Commit e50e5129 authored by Andreas Dilger's avatar Andreas Dilger Committed by Theodore Ts'o
Browse files

ext4: xattr-in-inode support

Large xattr support is implemented for EXT4_FEATURE_INCOMPAT_EA_INODE.

If the size of an xattr value is larger than will fit in a single
external block, then the xattr value will be saved into the body
of an external xattr inode.

The also helps support a larger number of xattr, since only the headers
will be stored in the in-inode space or the single external block.

The inode is referenced from the xattr header via "e_value_inum",
which was formerly "e_value_block", but that field was never used.
The e_value_size still contains the xattr size so that listing
xattrs does not need to look up the inode if the data is not accessed.

struct ext4_xattr_entry {
        __u8    e_name_len;     /* length of name */
        __u8    e_name_index;   /* attribute name index */
        __le16  e_value_offs;   /* offset in disk block of value */
        __le32  e_value_inum;   /* inode in which value is stored */
        __le32  e_value_size;   /* size of attribute value */
        __le32  e_hash;         /* hash value of name and value */
        char    e_name[0];      /* attribute name */
};

The xattr inode is marked with the EXT4_EA_INODE_FL flag and also
holds a back-reference to the owning inode in its i_mtime field,
allowing the ext4/e2fsck to verify the correct inode is accessed.

[ Applied fix by Dan Carpenter to avoid freeing an ERR_PTR. ]

Lustre-Jira: https://jira.hpdd.intel.com/browse/LU-80
Lustre-bugzilla: https://bugzilla.lustre.org/show_bug.cgi?id=4424

Signed-off-by: default avatarKalpak Shah <kalpak.shah@sun.com>
Signed-off-by: default avatarJames Simmons <uja.ornl@gmail.com>
Signed-off-by: default avatarAndreas Dilger <andreas.dilger@intel.com>
Signed-off-by: default avatarTahsin Erdogan <tahsin@google.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
Signed-off-by: default avatarDan Carpenter <dan.carpenter@oracle.com>
parent e08ac99f
......@@ -1797,6 +1797,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_INCOMPAT_EXTENTS| \
EXT4_FEATURE_INCOMPAT_64BIT| \
EXT4_FEATURE_INCOMPAT_FLEX_BG| \
EXT4_FEATURE_INCOMPAT_EA_INODE| \
EXT4_FEATURE_INCOMPAT_MMP | \
EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
EXT4_FEATURE_INCOMPAT_ENCRYPT | \
......@@ -2230,6 +2231,12 @@ struct mmpd_data {
*/
#define EXT4_MMP_MAX_CHECK_INTERVAL 300UL
/*
* Maximum size of xattr attributes for FEATURE_INCOMPAT_EA_INODE 1Mb
* This limit is arbitrary, but is reasonable for the xattr API.
*/
#define EXT4_XATTR_MAX_LARGE_EA_SIZE (1024 * 1024)
/*
* Function prototypes
*/
......@@ -2242,6 +2249,10 @@ struct mmpd_data {
# define ATTRIB_NORET __attribute__((noreturn))
# define NORET_AND noreturn,
struct ext4_xattr_ino_array {
unsigned int xia_count; /* # of used item in the array */
unsigned int xia_inodes[0];
};
/* bitmap.c */
extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
......@@ -2489,6 +2500,7 @@ extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int chunk);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
......
......@@ -294,7 +294,6 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
* as writing the quota to disk may need the lock as well.
*/
dquot_initialize(inode);
ext4_xattr_delete_inode(handle, inode);
dquot_free_inode(inode);
dquot_drop(inode);
......
......@@ -61,7 +61,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
/* Compute min_offs. */
for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
if (!entry->e_value_block && entry->e_value_size) {
if (!entry->e_value_inum && entry->e_value_size) {
size_t offs = le16_to_cpu(entry->e_value_offs);
if (offs < min_offs)
min_offs = offs;
......
......@@ -139,8 +139,6 @@ static void ext4_invalidatepage(struct page *page, unsigned int offset,
unsigned int length);
static int __ext4_journalled_writepage(struct page *page, unsigned int len);
static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int pextents);
/*
* Test whether an inode is a fast symlink.
......@@ -189,6 +187,8 @@ void ext4_evict_inode(struct inode *inode)
{
handle_t *handle;
int err;
int extra_credits = 3;
struct ext4_xattr_ino_array *lea_ino_array = NULL;
trace_ext4_evict_inode(inode);
......@@ -238,8 +238,8 @@ void ext4_evict_inode(struct inode *inode)
* protection against it
*/
sb_start_intwrite(inode->i_sb);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
ext4_blocks_for_truncate(inode)+3);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, extra_credits);
if (IS_ERR(handle)) {
ext4_std_error(inode->i_sb, PTR_ERR(handle));
/*
......@@ -251,9 +251,36 @@ void ext4_evict_inode(struct inode *inode)
sb_end_intwrite(inode->i_sb);
goto no_delete;
}
if (IS_SYNC(inode))
ext4_handle_sync(handle);
/*
* Delete xattr inode before deleting the main inode.
*/
err = ext4_xattr_delete_inode(handle, inode, &lea_ino_array);
if (err) {
ext4_warning(inode->i_sb,
"couldn't delete inode's xattr (err %d)", err);
goto stop_handle;
}
if (!IS_NOQUOTA(inode))
extra_credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
if (!ext4_handle_has_enough_credits(handle,
ext4_blocks_for_truncate(inode) + extra_credits)) {
err = ext4_journal_extend(handle,
ext4_blocks_for_truncate(inode) + extra_credits);
if (err > 0)
err = ext4_journal_restart(handle,
ext4_blocks_for_truncate(inode) + extra_credits);
if (err != 0) {
ext4_warning(inode->i_sb,
"couldn't extend journal (err %d)", err);
goto stop_handle;
}
}
inode->i_size = 0;
err = ext4_mark_inode_dirty(handle, inode);
if (err) {
......@@ -277,10 +304,10 @@ void ext4_evict_inode(struct inode *inode)
* enough credits left in the handle to remove the inode from
* the orphan list and set the dtime field.
*/
if (!ext4_handle_has_enough_credits(handle, 3)) {
err = ext4_journal_extend(handle, 3);
if (!ext4_handle_has_enough_credits(handle, extra_credits)) {
err = ext4_journal_extend(handle, extra_credits);
if (err > 0)
err = ext4_journal_restart(handle, 3);
err = ext4_journal_restart(handle, extra_credits);
if (err != 0) {
ext4_warning(inode->i_sb,
"couldn't extend journal (err %d)", err);
......@@ -315,8 +342,12 @@ void ext4_evict_inode(struct inode *inode)
ext4_clear_inode(inode);
else
ext4_free_inode(handle, inode);
ext4_journal_stop(handle);
sb_end_intwrite(inode->i_sb);
if (lea_ino_array != NULL)
ext4_xattr_inode_array_free(inode, lea_ino_array);
return;
no_delete:
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
......@@ -5504,7 +5535,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int lblocks,
*
* Also account for superblock, inode, quota and xattr blocks
*/
static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int pextents)
{
ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
......
This diff is collapsed.
......@@ -44,7 +44,7 @@ struct ext4_xattr_entry {
__u8 e_name_len; /* length of name */
__u8 e_name_index; /* attribute name index */
__le16 e_value_offs; /* offset in disk block of value */
__le32 e_value_block; /* disk block attribute is stored on (n/i) */
__le32 e_value_inum; /* inode in which the value is stored */
__le32 e_value_size; /* size of attribute value */
__le32 e_hash; /* hash value of name and value */
char e_name[0]; /* attribute name */
......@@ -69,6 +69,26 @@ struct ext4_xattr_entry {
EXT4_I(inode)->i_extra_isize))
#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
/*
* Link EA inode back to parent one using i_mtime field.
* Extra integer type conversion added to ignore higher
* bits in i_mtime.tv_sec which might be set by ext4_get()
*/
#define EXT4_XATTR_INODE_SET_PARENT(inode, inum) \
do { \
(inode)->i_mtime.tv_sec = inum; \
} while(0)
#define EXT4_XATTR_INODE_GET_PARENT(inode) \
((__u32)(inode)->i_mtime.tv_sec)
/*
* The minimum size of EA value when you start storing it in an external inode
* size of block - size of header - size of 1 entry - 4 null bytes
*/
#define EXT4_XATTR_MIN_LARGE_EA_SIZE(b) \
((b) - EXT4_XATTR_LEN(3) - sizeof(struct ext4_xattr_header) - 4)
#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
#define BFIRST(bh) ENTRY(BHDR(bh)+1)
......@@ -77,10 +97,11 @@ struct ext4_xattr_entry {
#define EXT4_ZERO_XATTR_VALUE ((void *)-1)
struct ext4_xattr_info {
int name_index;
const char *name;
const void *value;
size_t value_len;
int name_index;
int in_inode;
};
struct ext4_xattr_search {
......@@ -140,7 +161,13 @@ extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
extern struct inode *ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
int *err);
extern int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino);
extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
struct ext4_xattr_ino_array **array);
extern void ext4_xattr_inode_array_free(struct inode *inode,
struct ext4_xattr_ino_array *array);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment