Commit 63a42e1a authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-4.20-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "Several fixes to recent release (4.19, fixes tagged for stable) and
  other fixes"

* tag 'for-4.20-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  Btrfs: fix missing delayed iputs on unmount
  Btrfs: fix data corruption due to cloning of eof block
  Btrfs: fix infinite loop on inode eviction after deduplication of eof block
  Btrfs: fix deadlock on tree root leaf when finding free extent
  btrfs: avoid link error with CONFIG_NO_AUTO_INLINE
  btrfs: tree-checker: Fix misleading group system information
  Btrfs: fix missing data checksums after a ranged fsync (msync)
  btrfs: fix pinned underflow after transaction aborted
  Btrfs: fix cur_offset in the error case for nocow
parents c140f8b0 d6fd0ae2
......@@ -3163,6 +3163,9 @@ void btrfs_destroy_inode(struct inode *inode);
int btrfs_drop_inode(struct inode *inode);
int __init btrfs_init_cachep(void);
void __cold btrfs_destroy_cachep(void);
struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
struct btrfs_root *root, int *new,
struct btrfs_path *path);
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
struct btrfs_root *root, int *was_new);
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
......
......@@ -1664,9 +1664,8 @@ static int cleaner_kthread(void *arg)
struct btrfs_root *root = arg;
struct btrfs_fs_info *fs_info = root->fs_info;
int again;
struct btrfs_trans_handle *trans;
do {
while (1) {
again = 0;
/* Make the cleaner go to sleep early. */
......@@ -1715,42 +1714,16 @@ static int cleaner_kthread(void *arg)
*/
btrfs_delete_unused_bgs(fs_info);
sleep:
if (kthread_should_park())
kthread_parkme();
if (kthread_should_stop())
return 0;
if (!again) {
set_current_state(TASK_INTERRUPTIBLE);
if (!kthread_should_stop())
schedule();
schedule();
__set_current_state(TASK_RUNNING);
}
} while (!kthread_should_stop());
/*
* Transaction kthread is stopped before us and wakes us up.
* However we might have started a new transaction and COWed some
* tree blocks when deleting unused block groups for example. So
* make sure we commit the transaction we started to have a clean
* shutdown when evicting the btree inode - if it has dirty pages
* when we do the final iput() on it, eviction will trigger a
* writeback for it which will fail with null pointer dereferences
* since work queues and other resources were already released and
* destroyed by the time the iput/eviction/writeback is made.
*/
trans = btrfs_attach_transaction(root);
if (IS_ERR(trans)) {
if (PTR_ERR(trans) != -ENOENT)
btrfs_err(fs_info,
"cleaner transaction attach returned %ld",
PTR_ERR(trans));
} else {
int ret;
ret = btrfs_commit_transaction(trans);
if (ret)
btrfs_err(fs_info,
"cleaner open transaction commit returned %d",
ret);
}
return 0;
}
static int transaction_kthread(void *arg)
......@@ -3931,6 +3904,13 @@ void close_ctree(struct btrfs_fs_info *fs_info)
int ret;
set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
/*
* We don't want the cleaner to start new transactions, add more delayed
* iputs, etc. while we're closing. We can't use kthread_stop() yet
* because that frees the task_struct, and the transaction kthread might
* still try to wake up the cleaner.
*/
kthread_park(fs_info->cleaner_kthread);
/* wait for the qgroup rescan worker to stop */
btrfs_qgroup_wait_for_completion(fs_info, false);
......@@ -3958,9 +3938,8 @@ void close_ctree(struct btrfs_fs_info *fs_info)
if (!sb_rdonly(fs_info->sb)) {
/*
* If the cleaner thread is stopped and there are
* block groups queued for removal, the deletion will be
* skipped when we quit the cleaner thread.
* The cleaner kthread is stopped, so do one final pass over
* unused block groups.
*/
btrfs_delete_unused_bgs(fs_info);
......@@ -4359,13 +4338,23 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
unpin = pinned_extents;
again:
while (1) {
/*
* The btrfs_finish_extent_commit() may get the same range as
* ours between find_first_extent_bit and clear_extent_dirty.
* Hence, hold the unused_bg_unpin_mutex to avoid double unpin
* the same extent range.
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
ret = find_first_extent_bit(unpin, 0, &start, &end,
EXTENT_DIRTY, NULL);
if (ret)
if (ret) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
break;
}
clear_extent_dirty(unpin, start, end);
btrfs_error_unpin_extent_range(fs_info, start, end);
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
cond_resched();
}
......
......@@ -75,7 +75,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
* sure NOFS is set to keep us from deadlocking.
*/
nofs_flag = memalloc_nofs_save();
inode = btrfs_iget(fs_info->sb, &location, root, NULL);
inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path);
btrfs_release_path(path);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(inode))
return inode;
......@@ -838,6 +839,25 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
path->search_commit_root = 1;
path->skip_locking = 1;
/*
* We must pass a path with search_commit_root set to btrfs_iget in
* order to avoid a deadlock when allocating extents for the tree root.
*
* When we are COWing an extent buffer from the tree root, when looking
* for a free extent, at extent-tree.c:find_free_extent(), we can find
* block group without its free space cache loaded. When we find one
* we must load its space cache which requires reading its free space
* cache's inode item from the root tree. If this inode item is located
* in the same leaf that we started COWing before, then we end up in
* deadlock on the extent buffer (trying to read lock it when we
* previously write locked it).
*
* It's safe to read the inode item using the commit root because
* block groups, once loaded, stay in memory forever (until they are
* removed) as well as their space caches once loaded. New block groups
* once created get their ->cached field set to BTRFS_CACHE_FINISHED so
* we will never try to read their inode item while the fs is mounted.
*/
inode = lookup_free_space_inode(fs_info, block_group, path);
if (IS_ERR(inode)) {
btrfs_free_path(path);
......
......@@ -1531,12 +1531,11 @@ static noinline int run_delalloc_nocow(struct inode *inode,
}
btrfs_release_path(path);
if (cur_offset <= end && cow_start == (u64)-1) {
if (cur_offset <= end && cow_start == (u64)-1)
cow_start = cur_offset;
cur_offset = end;
}
if (cow_start != (u64)-1) {
cur_offset = end;
ret = cow_file_range(inode, locked_page, cow_start, end, end,
page_started, nr_written, 1, NULL);
if (ret)
......@@ -3570,10 +3569,11 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
/*
* read an inode from the btree into the in-memory inode
*/
static int btrfs_read_locked_inode(struct inode *inode)
static int btrfs_read_locked_inode(struct inode *inode,
struct btrfs_path *in_path)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_path *path;
struct btrfs_path *path = in_path;
struct extent_buffer *leaf;
struct btrfs_inode_item *inode_item;
struct btrfs_root *root = BTRFS_I(inode)->root;
......@@ -3589,15 +3589,18 @@ static int btrfs_read_locked_inode(struct inode *inode)
if (!ret)
filled = true;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
if (!path) {
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
}
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
if (ret) {
btrfs_free_path(path);
if (path != in_path)
btrfs_free_path(path);
return ret;
}
......@@ -3722,7 +3725,8 @@ static int btrfs_read_locked_inode(struct inode *inode)
btrfs_ino(BTRFS_I(inode)),
root->root_key.objectid, ret);
}
btrfs_free_path(path);
if (path != in_path)
btrfs_free_path(path);
if (!maybe_acls)
cache_no_acl(inode);
......@@ -5644,8 +5648,9 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
/* Get an inode object given its location and corresponding root.
* Returns in *is_new if the inode was read from disk
*/
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
struct btrfs_root *root, int *new)
struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
struct btrfs_root *root, int *new,
struct btrfs_path *path)
{
struct inode *inode;
......@@ -5656,7 +5661,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
if (inode->i_state & I_NEW) {
int ret;
ret = btrfs_read_locked_inode(inode);
ret = btrfs_read_locked_inode(inode, path);
if (!ret) {
inode_tree_add(inode);
unlock_new_inode(inode);
......@@ -5678,6 +5683,12 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
return inode;
}
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
struct btrfs_root *root, int *new)
{
return btrfs_iget_path(s, location, root, new, NULL);
}
static struct inode *new_simple_dir(struct super_block *s,
struct btrfs_key *key,
struct btrfs_root *root)
......
......@@ -3488,6 +3488,8 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize;
len = round_down(i_size_read(src), sz) - loff;
if (len == 0)
return 0;
olen = len;
}
}
......@@ -4257,9 +4259,17 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
goto out_unlock;
if (len == 0)
olen = len = src->i_size - off;
/* if we extend to eof, continue to block boundary */
if (off + len == src->i_size)
/*
* If we extend to eof, continue to block boundary if and only if the
* destination end offset matches the destination file's size, otherwise
* we would be corrupting data by placing the eof block into the middle
* of a file.
*/
if (off + len == src->i_size) {
if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size)
goto out_unlock;
len = ALIGN(src->i_size, bs) - off;
}
if (len == 0) {
ret = 0;
......
......@@ -1916,7 +1916,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
}
/* Used to sort the devices by max_avail(descending sort) */
static int btrfs_cmp_device_free_bytes(const void *dev_info1,
static inline int btrfs_cmp_device_free_bytes(const void *dev_info1,
const void *dev_info2)
{
if (((struct btrfs_device_info *)dev_info1)->max_avail >
......@@ -1945,8 +1945,8 @@ static inline void btrfs_descending_sort_devices(
* The helper to calc the free space on the devices that can be used to store
* file data.
*/
static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
u64 *free_bytes)
static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
u64 *free_bytes)
{
struct btrfs_device_info *devices_info;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
......
......@@ -440,7 +440,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
type != (BTRFS_BLOCK_GROUP_METADATA |
BTRFS_BLOCK_GROUP_DATA)) {
block_group_err(fs_info, leaf, slot,
"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx",
"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
type, hweight64(type),
BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
BTRFS_BLOCK_GROUP_SYSTEM,
......
......@@ -4396,6 +4396,23 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
logged_end = end;
list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
/*
* Skip extents outside our logging range. It's important to do
* it for correctness because if we don't ignore them, we may
* log them before their ordered extent completes, and therefore
* we could log them without logging their respective checksums
* (the checksum items are added to the csum tree at the very
* end of btrfs_finish_ordered_io()). Also leave such extents
* outside of our range in the list, since we may have another
* ranged fsync in the near future that needs them. If an extent
* outside our range corresponds to a hole, log it to avoid
* leaving gaps between extents (fsck will complain when we are
* not using the NO_HOLES feature).
*/
if ((em->start > end || em->start + em->len <= start) &&
em->block_start != EXTENT_MAP_HOLE)
continue;
list_del_init(&em->list);
/*
* Just an arbitrary number, this can be really CPU intensive
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment