Commit d02f00cc authored by Mark Fasheh's avatar Mark Fasheh Committed by Joel Becker
Browse files

ocfs2: allocation reservations



This patch improves Ocfs2 allocation policy by allowing an inode to
reserve a portion of the local alloc bitmap for itself. The reserved
portion (allocation window) is advisory in that other allocation
windows might steal it if the local alloc bitmap becomes
full. Otherwise, the reservations are honored and guaranteed to be
free. When the local alloc window is moved to a different portion of
the bitmap, existing reservations are discarded.

Reservation windows are represented internally by a red-black
tree. Within that tree, each node represents the reservation window of
one inode. An LRU of active reservations is also maintained. When new
data is written, we allocate it from the inodes window. When all bits
in a window are exhausted, we allocate a new one as close to the
previous one as possible. Should we not find free space, an existing
reservation is pulled off the LRU and cannibalized.
Signed-off-by: default avatarMark Fasheh <mfasheh@suse.com>
parent ec20cec7
......@@ -80,3 +80,6 @@ user_xattr (*) Enables Extended User Attributes.
nouser_xattr Disables Extended User Attributes.
acl Enables POSIX Access Control Lists support.
noacl (*) Disables POSIX Access Control Lists support.
resv_level=4 (*) Set how agressive allocation reservations will be.
Valid values are between 0 (reservations off) to 8
(maximum space for reservations).
......@@ -29,6 +29,7 @@ ocfs2-objs := \
mmap.o \
namei.o \
refcounttree.o \
reservations.o \
resize.o \
slot_map.o \
suballoc.o \
......
......@@ -116,6 +116,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
define_mask(ERROR),
define_mask(NOTICE),
define_mask(KTHREAD),
define_mask(RESERVATIONS),
};
static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
......
......@@ -119,6 +119,7 @@
#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */
#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
......
......@@ -52,7 +52,8 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
struct ocfs2_dinode *alloc,
u32 numbits);
u32 *numbits,
struct ocfs2_alloc_reservation *resv);
static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
......@@ -262,6 +263,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
osb->local_alloc_state = OCFS2_LA_DISABLED;
ocfs2_resmap_uninit(&osb->osb_la_resmap);
main_bm_inode = ocfs2_get_system_file_inode(osb,
GLOBAL_BITMAP_SYSTEM_INODE,
OCFS2_INVALID_SLOT);
......@@ -493,7 +496,7 @@ static int ocfs2_local_alloc_in_range(struct inode *inode,
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
la = OCFS2_LOCAL_ALLOC(alloc);
start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted, NULL);
if (start == -1) {
mlog_errno(-ENOSPC);
return 0;
......@@ -659,7 +662,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
la = OCFS2_LOCAL_ALLOC(alloc);
start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted,
ac->ac_resv);
if (start == -1) {
/* TODO: Shouldn't we just BUG here? */
status = -ENOSPC;
......@@ -669,8 +673,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
bitmap = la->la_bitmap;
*bit_off = le32_to_cpu(la->la_bm_off) + start;
/* local alloc is always contiguous by nature -- we never
* delete bits from it! */
*num_bits = bits_wanted;
status = ocfs2_journal_access_di(handle,
......@@ -682,6 +684,9 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
goto bail;
}
ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start,
bits_wanted);
while(bits_wanted--)
ocfs2_set_bit(start++, bitmap);
......@@ -711,13 +716,17 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
}
static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
struct ocfs2_dinode *alloc,
u32 numbits)
struct ocfs2_dinode *alloc,
u32 *numbits,
struct ocfs2_alloc_reservation *resv)
{
int numfound, bitoff, left, startoff, lastzero;
int local_resv = 0;
struct ocfs2_alloc_reservation r;
void *bitmap = NULL;
struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap;
mlog_entry("(numbits wanted = %u)\n", numbits);
mlog_entry("(numbits wanted = %u)\n", *numbits);
if (!alloc->id1.bitmap1.i_total) {
mlog(0, "No bits in my window!\n");
......@@ -725,6 +734,30 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
goto bail;
}
if (!resv) {
local_resv = 1;
ocfs2_resv_init_once(&r);
ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP);
resv = &r;
}
numfound = *numbits;
if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) {
if (numfound < *numbits)
*numbits = numfound;
goto bail;
}
/*
* Code error. While reservations are enabled, local
* allocation should _always_ go through them.
*/
BUG_ON(osb->osb_resv_level != 0);
/*
* Reservations are disabled. Handle this the old way.
*/
bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
numfound = bitoff = startoff = 0;
......@@ -750,7 +783,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
startoff = bitoff+1;
}
/* we got everything we needed */
if (numfound == numbits) {
if (numfound == *numbits) {
/* mlog(0, "Found it all!\n"); */
break;
}
......@@ -759,12 +792,18 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
numfound);
if (numfound == numbits)
if (numfound == *numbits) {
bitoff = startoff - numfound;
else
*numbits = numfound;
} else {
numfound = 0;
bitoff = -1;
}
bail:
if (local_resv)
ocfs2_resv_discard(resmap, resv);
mlog_exit(bitoff);
return bitoff;
}
......@@ -1087,6 +1126,9 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
le16_to_cpu(la->la_size));
ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count,
OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
mlog(0, "New window allocated:\n");
mlog(0, "window la_bm_off = %u\n",
OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
......
......@@ -47,6 +47,7 @@
/* For struct ocfs2_blockcheck_stats */
#include "blockcheck.h"
#include "reservations.h"
/* Caching of metadata buffers */
......@@ -349,6 +350,10 @@ struct ocfs2_super
u64 la_last_gd;
struct ocfs2_reservation_map osb_la_resmap;
unsigned int osb_resv_level;
/* Next three fields are for local node slot recovery during
* mount. */
int dirty;
......
This diff is collapsed.
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* reservations.h
*
* Allocation reservations function prototypes and structures.
*
* Copyright (C) 2010 Novell. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef OCFS2_RESERVATIONS_H
#define OCFS2_RESERVATIONS_H
#include <linux/rbtree.h>
#define OCFS2_DEFAULT_RESV_LEVEL 4
#define OCFS2_MAX_RESV_LEVEL 9
#define OCFS2_MIN_RESV_LEVEL 0
struct ocfs2_alloc_reservation {
struct rb_node r_node;
unsigned int r_start; /* Begining of current window */
unsigned int r_len; /* Length of the window */
unsigned int r_last_len; /* Length of most recent alloc */
unsigned int r_last_start; /* Start of most recent alloc */
struct list_head r_lru; /* LRU list head */
unsigned int r_flags;
};
#define OCFS2_RESV_FLAG_INUSE 0x01 /* Set when r_node is part of a btree */
#define OCFS2_RESV_FLAG_TMP 0x02 /* Temporary reservation, will be
* destroyed immedately after use */
struct ocfs2_reservation_map {
struct rb_root m_reservations;
char *m_disk_bitmap;
struct ocfs2_super *m_osb;
/* The following are not initialized to meaningful values until a disk
* bitmap is provided. */
u32 m_bitmap_len; /* Number of valid
* bits available */
struct list_head m_lru; /* LRU of reservations
* structures. */
};
void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv);
#define OCFS2_RESV_TYPES (OCFS2_RESV_FLAG_TMP)
void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv,
unsigned int flags);
/**
* ocfs2_resv_discard() - truncate a reservation
* @resmap:
* @resv: the reservation to truncate.
*
* After this function is called, the reservation will be empty, and
* unlinked from the rbtree.
*/
void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
struct ocfs2_alloc_reservation *resv);
/**
* ocfs2_resmap_init() - Initialize fields of a reservations bitmap
* @resmap: struct ocfs2_reservation_map to initialize
* @obj: unused for now
* @ops: unused for now
* @max_bitmap_bytes: Maximum size of the bitmap (typically blocksize)
*
* Only possible return value other than '0' is -ENOMEM for failure to
* allocation mirror bitmap.
*/
int ocfs2_resmap_init(struct ocfs2_super *osb,
struct ocfs2_reservation_map *resmap);
/**
* ocfs2_resmap_restart() - "restart" a reservation bitmap
* @resmap: reservations bitmap
* @clen: Number of valid bits in the bitmap
* @disk_bitmap: the disk bitmap this resmap should refer to.
*
* Re-initialize the parameters of a reservation bitmap. This is
* useful for local alloc window slides.
*
* This function will call ocfs2_trunc_resv against all existing
* reservations. A future version will recalculate existing
* reservations based on the new bitmap.
*/
void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
unsigned int clen, char *disk_bitmap);
/**
* ocfs2_resmap_uninit() - uninitialize a reservation bitmap structure
* @resmap: the struct ocfs2_reservation_map to uninitialize
*/
void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap);
/**
* ocfs2_resmap_resv_bits() - Return still-valid reservation bits
* @resmap: reservations bitmap
* @resv: reservation to base search from
* @cstart: start of proposed allocation
* @clen: length (in clusters) of proposed allocation
*
* Using the reservation data from resv, this function will compare
* resmap and resmap->m_disk_bitmap to determine what part (if any) of
* the reservation window is still clear to use. If resv is empty,
* this function will try to allocate a window for it.
*
* On success, zero is returned and the valid allocation area is set in cstart
* and clen.
*
* Returns -ENOSPC if reservations are disabled.
*/
int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
struct ocfs2_alloc_reservation *resv,
int *cstart, int *clen);
/**
* ocfs2_resmap_claimed_bits() - Tell the reservation code that bits were used.
* @resmap: reservations bitmap
* @resv: optional reservation to recalulate based on new bitmap
* @cstart: start of allocation in clusters
* @clen: end of allocation in clusters.
*
* Tell the reservation code that bits were used to fulfill allocation in
* resmap. The bits don't have to have been part of any existing
* reservation. But we must always call this function when bits are claimed.
* Internally, the reservations code will use this information to mark the
* reservations bitmap. If resv is passed, it's next allocation window will be
* calculated.
*/
void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
struct ocfs2_alloc_reservation *resv,
u32 cstart, u32 clen);
#endif /* OCFS2_RESERVATIONS_H */
......@@ -54,6 +54,8 @@ struct ocfs2_alloc_context {
u64 ac_last_group;
u64 ac_max_block; /* Highest block number to allocate. 0 is
is the same as ~0 - unlimited */
struct ocfs2_alloc_reservation *ac_resv;
};
void ocfs2_init_steal_slots(struct ocfs2_super *osb);
......
......@@ -95,6 +95,7 @@ struct mount_options
unsigned int atime_quantum;
signed short slot;
unsigned int localalloc_opt;
unsigned int resv_level;
char cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
};
......@@ -176,6 +177,7 @@ enum {
Opt_noacl,
Opt_usrquota,
Opt_grpquota,
Opt_resv_level,
Opt_err,
};
......@@ -202,6 +204,7 @@ static const match_table_t tokens = {
{Opt_noacl, "noacl"},
{Opt_usrquota, "usrquota"},
{Opt_grpquota, "grpquota"},
{Opt_resv_level, "resv_level=%u"},
{Opt_err, NULL}
};
......@@ -1030,6 +1033,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb->osb_commit_interval = parsed_options.commit_interval;
osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
osb->local_alloc_bits = osb->local_alloc_default_bits;
osb->osb_resv_level = parsed_options.resv_level;
status = ocfs2_verify_userspace_stack(osb, &parsed_options);
if (status)
......@@ -1290,6 +1294,7 @@ static int ocfs2_parse_options(struct super_block *sb,
mopt->slot = OCFS2_INVALID_SLOT;
mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE;
mopt->cluster_stack[0] = '\0';
mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL;
if (!options) {
status = 1;
......@@ -1433,6 +1438,17 @@ static int ocfs2_parse_options(struct super_block *sb,
mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
break;
case Opt_resv_level:
if (is_remount)
break;
if (match_int(&args[0], &option)) {
status = 0;
goto bail;
}
if (option >= OCFS2_MIN_RESV_LEVEL &&
option < OCFS2_MAX_RESV_LEVEL)
mopt->resv_level = option;
break;
default:
mlog(ML_ERROR,
"Unrecognized mount option \"%s\" "
......@@ -1514,6 +1530,9 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
else
seq_printf(s, ",noacl");
if (osb->osb_resv_level != OCFS2_DEFAULT_RESV_LEVEL)
seq_printf(s, ",resv_level=%d", osb->osb_resv_level);
return 0;
}
......@@ -2042,6 +2061,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
init_waitqueue_head(&osb->osb_mount_event);
status = ocfs2_resmap_init(osb, &osb->osb_la_resmap);
if (status) {
mlog_errno(status);
goto bail;
}
osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
if (!osb->vol_label) {
mlog(ML_ERROR, "unable to alloc vol label\n");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment