file.c 77.5 KB
Newer Older
1
2
/*
  FUSE: Filesystem in Userspace
Miklos Szeredi's avatar
Miklos Szeredi committed
3
  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5
6
7
8
9
10
11
12
13

  This program can be distributed under the terms of the GNU GPL.
  See the file COPYING.
*/

#include "fuse_i.h"

#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/kernel.h>
Alexey Dobriyan's avatar
Alexey Dobriyan committed
14
#include <linux/sched.h>
15
#include <linux/sched/signal.h>
16
#include <linux/module.h>
17
#include <linux/compat.h>
18
#include <linux/swap.h>
19
#include <linux/falloc.h>
20
#include <linux/uio.h>
21

22
static const struct file_operations fuse_direct_io_file_operations;
23

24
25
static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
			  int opcode, struct fuse_open_out *outargp)
26
27
{
	struct fuse_open_in inarg;
28
	FUSE_ARGS(args);
29
30

	memset(&inarg, 0, sizeof(inarg));
31
32
33
	inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
	if (!fc->atomic_o_trunc)
		inarg.flags &= ~O_TRUNC;
34
35
36
37
38
39
40
41
	args.in.h.opcode = opcode;
	args.in.h.nodeid = nodeid;
	args.in.numargs = 1;
	args.in.args[0].size = sizeof(inarg);
	args.in.args[0].value = &inarg;
	args.out.numargs = 1;
	args.out.args[0].size = sizeof(*outargp);
	args.out.args[0].value = outargp;
42

43
	return fuse_simple_request(fc, &args);
44
45
}

Tejun Heo's avatar
Tejun Heo committed
46
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
47
48
{
	struct fuse_file *ff;
Tejun Heo's avatar
Tejun Heo committed
49

50
	ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL);
Tejun Heo's avatar
Tejun Heo committed
51
52
53
	if (unlikely(!ff))
		return NULL;

54
	ff->fc = fc;
55
	ff->reserved_req = fuse_request_alloc(0);
Tejun Heo's avatar
Tejun Heo committed
56
57
58
	if (unlikely(!ff->reserved_req)) {
		kfree(ff);
		return NULL;
59
	}
Tejun Heo's avatar
Tejun Heo committed
60
61

	INIT_LIST_HEAD(&ff->write_entry);
62
	mutex_init(&ff->readdir.lock);
63
	refcount_set(&ff->count, 1);
Tejun Heo's avatar
Tejun Heo committed
64
65
66
67
68
69
70
	RB_CLEAR_NODE(&ff->polled_node);
	init_waitqueue_head(&ff->poll_wait);

	spin_lock(&fc->lock);
	ff->kh = ++fc->khctr;
	spin_unlock(&fc->lock);

71
72
73
74
75
	return ff;
}

void fuse_file_free(struct fuse_file *ff)
{
76
	fuse_request_free(ff->reserved_req);
77
	mutex_destroy(&ff->readdir.lock);
78
79
80
	kfree(ff);
}

81
static struct fuse_file *fuse_file_get(struct fuse_file *ff)
82
{
83
	refcount_inc(&ff->count);
84
85
86
	return ff;
}

Miklos Szeredi's avatar
Miklos Szeredi committed
87
88
static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
{
89
	iput(req->misc.release.inode);
Miklos Szeredi's avatar
Miklos Szeredi committed
90
91
}

92
static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
93
{
94
	if (refcount_dec_and_test(&ff->count)) {
95
		struct fuse_req *req = ff->reserved_req;
96

97
		if (ff->fc->no_open && !isdir) {
98
99
100
101
			/*
			 * Drop the release request when client does not
			 * implement 'open'
			 */
Miklos Szeredi's avatar
Miklos Szeredi committed
102
			__clear_bit(FR_BACKGROUND, &req->flags);
103
			iput(req->misc.release.inode);
104
105
			fuse_put_request(ff->fc, req);
		} else if (sync) {
Miklos Szeredi's avatar
Miklos Szeredi committed
106
			__set_bit(FR_FORCE, &req->flags);
Miklos Szeredi's avatar
Miklos Szeredi committed
107
			__clear_bit(FR_BACKGROUND, &req->flags);
108
			fuse_request_send(ff->fc, req);
109
			iput(req->misc.release.inode);
110
111
112
			fuse_put_request(ff->fc, req);
		} else {
			req->end = fuse_release_end;
Miklos Szeredi's avatar
Miklos Szeredi committed
113
			__set_bit(FR_BACKGROUND, &req->flags);
114
115
			fuse_request_send_background(ff->fc, req);
		}
116
117
118
119
		kfree(ff);
	}
}

120
121
int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
		 bool isdir)
122
123
124
125
126
127
128
129
{
	struct fuse_file *ff;
	int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;

	ff = fuse_file_alloc(fc);
	if (!ff)
		return -ENOMEM;

130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
	ff->fh = 0;
	ff->open_flags = FOPEN_KEEP_CACHE; /* Default for no-open */
	if (!fc->no_open || isdir) {
		struct fuse_open_out outarg;
		int err;

		err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
		if (!err) {
			ff->fh = outarg.fh;
			ff->open_flags = outarg.open_flags;

		} else if (err != -ENOSYS || isdir) {
			fuse_file_free(ff);
			return err;
		} else {
			fc->no_open = 1;
		}
147
148
149
	}

	if (isdir)
150
		ff->open_flags &= ~FOPEN_DIRECT_IO;
151
152

	ff->nodeid = nodeid;
153
	file->private_data = ff;
154
155
156

	return 0;
}
157
EXPORT_SYMBOL_GPL(fuse_do_open);
158

159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
static void fuse_link_write_file(struct file *file)
{
	struct inode *inode = file_inode(file);
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct fuse_file *ff = file->private_data;
	/*
	 * file may be written through mmap, so chain it onto the
	 * inodes's write_file list
	 */
	spin_lock(&fc->lock);
	if (list_empty(&ff->write_entry))
		list_add(&ff->write_entry, &fi->write_files);
	spin_unlock(&fc->lock);
}

175
void fuse_finish_open(struct inode *inode, struct file *file)
176
{
177
	struct fuse_file *ff = file->private_data;
178
	struct fuse_conn *fc = get_fuse_conn(inode);
179
180

	if (ff->open_flags & FOPEN_DIRECT_IO)
181
		file->f_op = &fuse_direct_io_file_operations;
182
	if (!(ff->open_flags & FOPEN_KEEP_CACHE))
Miklos Szeredi's avatar
Miklos Szeredi committed
183
		invalidate_inode_pages2(inode->i_mapping);
184
	if (ff->open_flags & FOPEN_NONSEEKABLE)
Tejun Heo's avatar
Tejun Heo committed
185
		nonseekable_open(inode, file);
186
187
188
189
190
191
192
193
	if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
		struct fuse_inode *fi = get_fuse_inode(inode);

		spin_lock(&fc->lock);
		fi->attr_version = ++fc->attr_version;
		i_size_write(inode, 0);
		spin_unlock(&fc->lock);
		fuse_invalidate_attr(inode);
194
195
		if (fc->writeback_cache)
			file_update_time(file);
196
	}
197
198
	if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
		fuse_link_write_file(file);
199
200
}

201
int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
202
{
Tejun Heo's avatar
Tejun Heo committed
203
	struct fuse_conn *fc = get_fuse_conn(inode);
204
	int err;
205
206
207
	bool lock_inode = (file->f_flags & O_TRUNC) &&
			  fc->atomic_o_trunc &&
			  fc->writeback_cache;
208
209
210
211
212

	err = generic_file_open(inode, file);
	if (err)
		return err;

213
	if (lock_inode)
Al Viro's avatar
Al Viro committed
214
		inode_lock(inode);
215

216
	err = fuse_do_open(fc, get_node_id(inode), file, isdir);
217

218
219
	if (!err)
		fuse_finish_open(inode, file);
220

221
	if (lock_inode)
Al Viro's avatar
Al Viro committed
222
		inode_unlock(inode);
223
224

	return err;
225
226
}

227
static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
228
{
229
	struct fuse_conn *fc = ff->fc;
230
	struct fuse_req *req = ff->reserved_req;
231
	struct fuse_release_in *inarg = &req->misc.release.in;
232

233
234
235
236
237
238
	spin_lock(&fc->lock);
	list_del(&ff->write_entry);
	if (!RB_EMPTY_NODE(&ff->polled_node))
		rb_erase(&ff->polled_node, &fc->polled_files);
	spin_unlock(&fc->lock);

239
	wake_up_interruptible_all(&ff->poll_wait);
240

241
	inarg->fh = ff->fh;
242
	inarg->flags = flags;
243
	req->in.h.opcode = opcode;
244
	req->in.h.nodeid = ff->nodeid;
245
246
247
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(struct fuse_release_in);
	req->in.args[0].value = inarg;
248
249
}

250
void fuse_release_common(struct file *file, bool isdir)
251
{
252
253
	struct fuse_file *ff = file->private_data;
	struct fuse_req *req = ff->reserved_req;
254
	int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
Tejun Heo's avatar
Tejun Heo committed
255

256
	fuse_prepare_release(ff, file->f_flags, opcode);
Tejun Heo's avatar
Tejun Heo committed
257

Miklos Szeredi's avatar
Miklos Szeredi committed
258
259
260
261
262
263
	if (ff->flock) {
		struct fuse_release_in *inarg = &req->misc.release.in;
		inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
		inarg->lock_owner = fuse_lock_owner_id(ff->fc,
						       (fl_owner_t) file);
	}
264
265
	/* Hold inode until release is finished */
	req->misc.release.inode = igrab(file_inode(file));
Tejun Heo's avatar
Tejun Heo committed
266
267
268
269
270

	/*
	 * Normally this will send the RELEASE request, however if
	 * some asynchronous READ or WRITE requests are outstanding,
	 * the sending will be delayed.
271
272
273
274
	 *
	 * Make the release synchronous if this is a fuseblk mount,
	 * synchronous RELEASE is allowed (and desirable) in this case
	 * because the server can be trusted not to screw up.
Tejun Heo's avatar
Tejun Heo committed
275
	 */
276
	fuse_file_put(ff, ff->fc->destroy_req != NULL, isdir);
277
278
}

279
280
static int fuse_open(struct inode *inode, struct file *file)
{
281
	return fuse_open_common(inode, file, false);
282
283
284
285
}

static int fuse_release(struct inode *inode, struct file *file)
{
286
287
288
289
	struct fuse_conn *fc = get_fuse_conn(inode);

	/* see fuse_vma_close() for !writeback_cache case */
	if (fc->writeback_cache)
Miklos Szeredi's avatar
Miklos Szeredi committed
290
		write_inode_now(inode, 1);
291

292
	fuse_release_common(file, false);
293
294
295
296
297
298
299

	/* return value is ignored by VFS */
	return 0;
}

void fuse_sync_release(struct fuse_file *ff, int flags)
{
300
	WARN_ON(refcount_read(&ff->count) > 1);
301
	fuse_prepare_release(ff, flags, FUSE_RELEASE);
302
303
304
305
	/*
	 * iput(NULL) is a no-op and since the refcount is 1 and everything's
	 * synchronous, we are fine with not doing igrab() here"
	 */
306
	fuse_file_put(ff, true, false);
307
}
308
EXPORT_SYMBOL_GPL(fuse_sync_release);
309

310
/*
311
312
 * Scramble the ID space with XTEA, so that the value of the files_struct
 * pointer is not exposed to userspace.
313
 */
314
u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
315
{
316
317
318
319
320
321
322
323
324
325
326
327
328
329
	u32 *k = fc->scramble_key;
	u64 v = (unsigned long) id;
	u32 v0 = v;
	u32 v1 = v >> 32;
	u32 sum = 0;
	int i;

	for (i = 0; i < 32; i++) {
		v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
		sum += 0x9E3779B9;
		v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
	}

	return (u64) v0 + ((u64) v1 << 32);
330
331
}

Miklos Szeredi's avatar
Miklos Szeredi committed
332
/*
333
 * Check if any page in a range is under writeback
Miklos Szeredi's avatar
Miklos Szeredi committed
334
335
336
337
 *
 * This is currently done by walking the list of writepage requests
 * for the inode, which can be pretty inefficient.
 */
338
339
static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
				   pgoff_t idx_to)
Miklos Szeredi's avatar
Miklos Szeredi committed
340
341
342
343
344
345
346
347
348
349
350
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct fuse_req *req;
	bool found = false;

	spin_lock(&fc->lock);
	list_for_each_entry(req, &fi->writepages, writepages_entry) {
		pgoff_t curr_index;

		BUG_ON(req->inode != inode);
351
		curr_index = req->misc.write.in.offset >> PAGE_SHIFT;
352
353
		if (idx_from < curr_index + req->num_pages &&
		    curr_index <= idx_to) {
Miklos Szeredi's avatar
Miklos Szeredi committed
354
355
356
357
358
359
360
361
362
			found = true;
			break;
		}
	}
	spin_unlock(&fc->lock);

	return found;
}

363
364
365
366
367
static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
{
	return fuse_range_is_writeback(inode, index, index);
}

Miklos Szeredi's avatar
Miklos Szeredi committed
368
369
370
371
372
373
374
375
376
377
378
379
380
381
/*
 * Wait for page writeback to be completed.
 *
 * Since fuse doesn't rely on the VM writeback tracking, this has to
 * use some other means.
 */
static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
{
	struct fuse_inode *fi = get_fuse_inode(inode);

	wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
	return 0;
}

382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
/*
 * Wait for all pending writepages on the inode to finish.
 *
 * This is currently done by blocking further writes with FUSE_NOWRITE
 * and waiting for all sent writes to complete.
 *
 * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
 * could conflict with truncation.
 */
static void fuse_sync_writes(struct inode *inode)
{
	fuse_set_nowrite(inode);
	fuse_release_nowrite(inode);
}

397
static int fuse_flush(struct file *file, fl_owner_t id)
398
{
399
	struct inode *inode = file_inode(file);
400
401
402
403
404
405
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_file *ff = file->private_data;
	struct fuse_req *req;
	struct fuse_flush_in inarg;
	int err;

406
407
408
	if (is_bad_inode(inode))
		return -EIO;

409
410
411
	if (fc->no_flush)
		return 0;

Miklos Szeredi's avatar
Miklos Szeredi committed
412
	err = write_inode_now(inode, 1);
413
414
415
	if (err)
		return err;

Al Viro's avatar
Al Viro committed
416
	inode_lock(inode);
417
	fuse_sync_writes(inode);
Al Viro's avatar
Al Viro committed
418
	inode_unlock(inode);
419

420
	err = filemap_check_errors(file->f_mapping);
421
422
423
	if (err)
		return err;

424
	req = fuse_get_req_nofail_nopages(fc, file);
425
426
	memset(&inarg, 0, sizeof(inarg));
	inarg.fh = ff->fh;
427
	inarg.lock_owner = fuse_lock_owner_id(fc, id);
428
429
430
431
432
	req->in.h.opcode = FUSE_FLUSH;
	req->in.h.nodeid = get_node_id(inode);
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(inarg);
	req->in.args[0].value = &inarg;
Miklos Szeredi's avatar
Miklos Szeredi committed
433
	__set_bit(FR_FORCE, &req->flags);
434
	fuse_request_send(fc, req);
435
436
437
438
439
440
441
442
443
	err = req->out.h.error;
	fuse_put_request(fc, req);
	if (err == -ENOSYS) {
		fc->no_flush = 1;
		err = 0;
	}
	return err;
}

444
int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
Miklos Szeredi's avatar
Miklos Szeredi committed
445
		      int datasync, int opcode)
446
{
447
	struct inode *inode = file->f_mapping->host;
448
449
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_file *ff = file->private_data;
450
	FUSE_ARGS(args);
451
	struct fuse_fsync_in inarg;
Miklos Szeredi's avatar
Miklos Szeredi committed
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468

	memset(&inarg, 0, sizeof(inarg));
	inarg.fh = ff->fh;
	inarg.fsync_flags = datasync ? 1 : 0;
	args.in.h.opcode = opcode;
	args.in.h.nodeid = get_node_id(inode);
	args.in.numargs = 1;
	args.in.args[0].size = sizeof(inarg);
	args.in.args[0].value = &inarg;
	return fuse_simple_request(fc, &args);
}

static int fuse_fsync(struct file *file, loff_t start, loff_t end,
		      int datasync)
{
	struct inode *inode = file->f_mapping->host;
	struct fuse_conn *fc = get_fuse_conn(inode);
469
470
	int err;

471
472
473
	if (is_bad_inode(inode))
		return -EIO;

Al Viro's avatar
Al Viro committed
474
	inode_lock(inode);
475

Miklos Szeredi's avatar
Miklos Szeredi committed
476
477
478
479
480
	/*
	 * Start writeback against all dirty pages of the inode, then
	 * wait for all outstanding writes, before sending the FSYNC
	 * request.
	 */
481
	err = file_write_and_wait_range(file, start, end);
Miklos Szeredi's avatar
Miklos Szeredi committed
482
	if (err)
483
		goto out;
Miklos Szeredi's avatar
Miklos Szeredi committed
484
485

	fuse_sync_writes(inode);
486
487
488

	/*
	 * Due to implementation of fuse writeback
489
	 * file_write_and_wait_range() does not catch errors.
490
491
	 * We have to do this directly after fuse_sync_writes()
	 */
492
	err = file_check_and_advance_wb_err(file);
493
494
495
	if (err)
		goto out;

Miklos Szeredi's avatar
Miklos Szeredi committed
496
497
498
	err = sync_inode_metadata(inode, 1);
	if (err)
		goto out;
Miklos Szeredi's avatar
Miklos Szeredi committed
499

Miklos Szeredi's avatar
Miklos Szeredi committed
500
	if (fc->no_fsync)
Miklos Szeredi's avatar
Miklos Szeredi committed
501
		goto out;
502

Miklos Szeredi's avatar
Miklos Szeredi committed
503
	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNC);
504
	if (err == -ENOSYS) {
Miklos Szeredi's avatar
Miklos Szeredi committed
505
		fc->no_fsync = 1;
506
507
		err = 0;
	}
508
out:
Al Viro's avatar
Al Viro committed
509
	inode_unlock(inode);
510

Miklos Szeredi's avatar
Miklos Szeredi committed
511
	return err;
512
513
}

514
515
void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
		    size_t count, int opcode)
516
{
517
	struct fuse_read_in *inarg = &req->misc.read.in;
518
	struct fuse_file *ff = file->private_data;
519

520
521
522
	inarg->fh = ff->fh;
	inarg->offset = pos;
	inarg->size = count;
523
	inarg->flags = file->f_flags;
524
	req->in.h.opcode = opcode;
525
	req->in.h.nodeid = ff->nodeid;
526
527
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(struct fuse_read_in);
528
	req->in.args[0].value = inarg;
529
530
531
532
533
	req->out.argvar = 1;
	req->out.numargs = 1;
	req->out.args[0].size = count;
}

534
static void fuse_release_user_pages(struct fuse_req *req, bool should_dirty)
535
536
537
538
539
{
	unsigned i;

	for (i = 0; i < req->num_pages; i++) {
		struct page *page = req->pages[i];
540
		if (should_dirty)
541
542
543
544
545
			set_page_dirty_lock(page);
		put_page(page);
	}
}

546
547
548
549
550
static void fuse_io_release(struct kref *kref)
{
	kfree(container_of(kref, struct fuse_io_priv, refcnt));
}

551
552
553
554
555
556
557
558
559
560
561
static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
{
	if (io->err)
		return io->err;

	if (io->bytes >= 0 && io->write)
		return -EIO;

	return io->bytes < 0 ? io->size : io->bytes;
}

Maxim Patlasov's avatar
Maxim Patlasov committed
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
/**
 * In case of short read, the caller sets 'pos' to the position of
 * actual end of fuse request in IO request. Otherwise, if bytes_requested
 * == bytes_transferred or rw == WRITE, the caller sets 'pos' to -1.
 *
 * An example:
 * User requested DIO read of 64K. It was splitted into two 32K fuse requests,
 * both submitted asynchronously. The first of them was ACKed by userspace as
 * fully completed (req->out.args[0].size == 32K) resulting in pos == -1. The
 * second request was ACKed as short, e.g. only 1K was read, resulting in
 * pos == 33K.
 *
 * Thus, when all fuse requests are completed, the minimal non-negative 'pos'
 * will be equal to the length of the longest contiguous fragment of
 * transferred data starting from the beginning of IO request.
 */
static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
{
	int left;

	spin_lock(&io->lock);
	if (err)
		io->err = io->err ? : err;
	else if (pos >= 0 && (io->bytes < 0 || pos < io->bytes))
		io->bytes = pos;

	left = --io->reqs;
589
	if (!left && io->blocking)
590
		complete(io->done);
Maxim Patlasov's avatar
Maxim Patlasov committed
591
592
	spin_unlock(&io->lock);

593
	if (!left && !io->blocking) {
594
		ssize_t res = fuse_get_res_by_io(io);
Maxim Patlasov's avatar
Maxim Patlasov committed
595

596
597
598
599
		if (res >= 0) {
			struct inode *inode = file_inode(io->iocb->ki_filp);
			struct fuse_conn *fc = get_fuse_conn(inode);
			struct fuse_inode *fi = get_fuse_inode(inode);
Maxim Patlasov's avatar
Maxim Patlasov committed
600

601
602
603
			spin_lock(&fc->lock);
			fi->attr_version = ++fc->attr_version;
			spin_unlock(&fc->lock);
Maxim Patlasov's avatar
Maxim Patlasov committed
604
605
		}

606
		io->iocb->ki_complete(io->iocb, res, 0);
Maxim Patlasov's avatar
Maxim Patlasov committed
607
	}
608
609

	kref_put(&io->refcnt, fuse_io_release);
Maxim Patlasov's avatar
Maxim Patlasov committed
610
611
612
613
614
615
616
}

static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
{
	struct fuse_io_priv *io = req->io;
	ssize_t pos = -1;

617
	fuse_release_user_pages(req, io->should_dirty);
Maxim Patlasov's avatar
Maxim Patlasov committed
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635

	if (io->write) {
		if (req->misc.write.in.size != req->misc.write.out.size)
			pos = req->misc.write.in.offset - io->offset +
				req->misc.write.out.size;
	} else {
		if (req->misc.read.in.size != req->out.args[0].size)
			pos = req->misc.read.in.offset - io->offset +
				req->out.args[0].size;
	}

	fuse_aio_complete(io, req->out.h.error, pos);
}

static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req,
		size_t num_bytes, struct fuse_io_priv *io)
{
	spin_lock(&io->lock);
636
	kref_get(&io->refcnt);
Maxim Patlasov's avatar
Maxim Patlasov committed
637
638
639
640
641
642
643
	io->size += num_bytes;
	io->reqs++;
	spin_unlock(&io->lock);

	req->io = io;
	req->end = fuse_aio_complete_req;

644
	__fuse_get_request(req);
Maxim Patlasov's avatar
Maxim Patlasov committed
645
646
647
648
649
	fuse_request_send_background(fc, req);

	return num_bytes;
}

650
static size_t fuse_send_read(struct fuse_req *req, struct fuse_io_priv *io,
651
			     loff_t pos, size_t count, fl_owner_t owner)
652
{
653
	struct file *file = io->iocb->ki_filp;
654
655
	struct fuse_file *ff = file->private_data;
	struct fuse_conn *fc = ff->fc;
656

657
	fuse_read_fill(req, file, pos, count, FUSE_READ);
658
	if (owner != NULL) {
659
		struct fuse_read_in *inarg = &req->misc.read.in;
660
661
662
663

		inarg->read_flags |= FUSE_READ_LOCKOWNER;
		inarg->lock_owner = fuse_lock_owner_id(fc, owner);
	}
664
665
666
667

	if (io->async)
		return fuse_async_req_send(fc, req, count, io);

668
	fuse_request_send(fc, req);
669
	return req->out.args[0].size;
670
671
}

672
673
674
675
676
677
678
static void fuse_read_update_size(struct inode *inode, loff_t size,
				  u64 attr_ver)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);

	spin_lock(&fc->lock);
679
680
	if (attr_ver == fi->attr_version && size < inode->i_size &&
	    !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
681
682
683
684
685
686
		fi->attr_version = ++fc->attr_version;
		i_size_write(inode, size);
	}
	spin_unlock(&fc->lock);
}

687
688
689
690
static void fuse_short_read(struct fuse_req *req, struct inode *inode,
			    u64 attr_ver)
{
	size_t num_read = req->out.args[0].size;
691
692
693
694
695
696
697
698
699
	struct fuse_conn *fc = get_fuse_conn(inode);

	if (fc->writeback_cache) {
		/*
		 * A hole in a file. Some data after the hole are in page cache,
		 * but have not reached the client fs yet. So, the hole is not
		 * present there.
		 */
		int i;
700
701
		int start_idx = num_read >> PAGE_SHIFT;
		size_t off = num_read & (PAGE_SIZE - 1);
702

703
		for (i = start_idx; i < req->num_pages; i++) {
704
			zero_user_segment(req->pages[i], off, PAGE_SIZE);
705
706
707
708
709
710
			off = 0;
		}
	} else {
		loff_t pos = page_offset(req->pages[0]) + num_read;
		fuse_read_update_size(inode, pos, attr_ver);
	}
711
712
}

713
static int fuse_do_readpage(struct file *file, struct page *page)
714
{
715
716
	struct kiocb iocb;
	struct fuse_io_priv io;
717
718
	struct inode *inode = page->mapping->host;
	struct fuse_conn *fc = get_fuse_conn(inode);
719
	struct fuse_req *req;
720
721
	size_t num_read;
	loff_t pos = page_offset(page);
722
	size_t count = PAGE_SIZE;
723
	u64 attr_ver;
724
725
	int err;

Miklos Szeredi's avatar
Miklos Szeredi committed
726
	/*
Lucas De Marchi's avatar
Lucas De Marchi committed
727
	 * Page writeback can extend beyond the lifetime of the
Miklos Szeredi's avatar
Miklos Szeredi committed
728
729
730
731
732
	 * page-cache page, so make sure we read a properly synced
	 * page.
	 */
	fuse_wait_on_page_writeback(inode, page->index);

733
	req = fuse_get_req(fc, 1);
734
	if (IS_ERR(req))
735
		return PTR_ERR(req);
736

737
738
	attr_ver = fuse_get_attr_version(fc);

739
	req->out.page_zeroing = 1;
740
	req->out.argpages = 1;
741
742
	req->num_pages = 1;
	req->pages[0] = page;
743
	req->page_descs[0].length = count;
744
745
	init_sync_kiocb(&iocb, file);
	io = (struct fuse_io_priv) FUSE_IO_PRIV_SYNC(&iocb);
746
	num_read = fuse_send_read(req, &io, pos, count, NULL);
747
	err = req->out.h.error;
748
749
750
751
752
753

	if (!err) {
		/*
		 * Short read means EOF.  If file size is larger, truncate it
		 */
		if (num_read < count)
754
			fuse_short_read(req, inode, attr_ver);
755

756
		SetPageUptodate(page);
757
758
	}

759
	fuse_put_request(fc, req);
760
761
762
763
764
765
766
767
768
769
770
771
772
773

	return err;
}

static int fuse_readpage(struct file *file, struct page *page)
{
	struct inode *inode = page->mapping->host;
	int err;

	err = -EIO;
	if (is_bad_inode(inode))
		goto out;

	err = fuse_do_readpage(file, page);
774
	fuse_invalidate_atime(inode);
775
776
777
778
779
 out:
	unlock_page(page);
	return err;
}

780
static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
781
{
782
	int i;
783
784
	size_t count = req->misc.read.in.size;
	size_t num_read = req->out.args[0].size;
785
	struct address_space *mapping = NULL;
786

787
788
	for (i = 0; mapping == NULL && i < req->num_pages; i++)
		mapping = req->pages[i]->mapping;
789

790
791
792
793
794
795
	if (mapping) {
		struct inode *inode = mapping->host;

		/*
		 * Short read means EOF. If file size is larger, truncate it
		 */
796
797
		if (!req->out.h.error && num_read < count)
			fuse_short_read(req, inode, req->misc.read.attr_ver);
798

799
		fuse_invalidate_atime(inode);
800
	}
801

802
803
804
805
	for (i = 0; i < req->num_pages; i++) {
		struct page *page = req->pages[i];
		if (!req->out.h.error)
			SetPageUptodate(page);
806
807
		else
			SetPageError(page);
808
		unlock_page(page);
809
		put_page(page);
810
	}
811
	if (req->ff)
812
		fuse_file_put(req->ff, false, false);
813
814
}

815
static void fuse_send_readpages(struct fuse_req *req, struct file *file)
816
{
817
818
	struct fuse_file *ff = file->private_data;
	struct fuse_conn *fc = ff->fc;
819
	loff_t pos = page_offset(req->pages[0]);
820
	size_t count = req->num_pages << PAGE_SHIFT;
821
822

	req->out.argpages = 1;
823
	req->out.page_zeroing = 1;
824
	req->out.page_replace = 1;
825
	fuse_read_fill(req, file, pos, count, FUSE_READ);
826
	req->misc.read.attr_ver = fuse_get_attr_version(fc);
827
	if (fc->async_read) {
828
		req->ff = fuse_file_get(ff);
829
		req->end = fuse_readpages_end;
830
		fuse_request_send_background(fc, req);
831
	} else {
832
		fuse_request_send(fc, req);
833
		fuse_readpages_end(fc, req);
834
		fuse_put_request(fc, req);
835
	}
836
837
}

838
struct fuse_fill_data {
839
	struct fuse_req *req;
840
	struct file *file;
841
	struct inode *inode;
Maxim Patlasov's avatar
Maxim Patlasov committed
842
	unsigned nr_pages;
843
844
845
846
};

static int fuse_readpages_fill(void *_data, struct page *page)
{
847
	struct fuse_fill_data *data = _data;
848
849
850
851
	struct fuse_req *req = data->req;
	struct inode *inode = data->inode;
	struct fuse_conn *fc = get_fuse_conn(inode);

Miklos Szeredi's avatar
Miklos Szeredi committed
852
853
	fuse_wait_on_page_writeback(inode, page->index);

854
	if (req->num_pages &&
855
	    (req->num_pages == fc->max_pages ||
856
	     (req->num_pages + 1) * PAGE_SIZE > fc->max_read ||
857
	     req->pages[req->num_pages - 1]->index + 1 != page->index)) {
858
859
		unsigned int nr_alloc = min_t(unsigned int, data->nr_pages,
					      fc->max_pages);
860
		fuse_send_readpages(req, data->file);
861
862
863
864
865
866
		if (fc->async_read)
			req = fuse_get_req_for_background(fc, nr_alloc);
		else
			req = fuse_get_req(fc, nr_alloc);

		data->req = req;
867
		if (IS_ERR(req)) {
868
			unlock_page(page);
869
			return PTR_ERR(req);
870
871
		}
	}
Maxim Patlasov's avatar
Maxim Patlasov committed
872
873

	if (WARN_ON(req->num_pages >= req->max_pages)) {
874
		unlock_page(page);
Maxim Patlasov's avatar
Maxim Patlasov committed
875
876
877
878
		fuse_put_request(fc, req);
		return -EIO;
	}

879
	get_page(page);
880
	req->pages[req->num_pages] = page;
881
	req->page_descs[req->num_pages].length = PAGE_SIZE;
Miklos Szeredi's avatar
Miklos Szeredi committed
882
	req->num_pages++;
Maxim Patlasov's avatar
Maxim Patlasov committed
883
	data->nr_pages--;
884
885
886
887
888
889
890
891
	return 0;
}

static int fuse_readpages(struct file *file, struct address_space *mapping,
			  struct list_head *pages, unsigned nr_pages)
{
	struct inode *inode = mapping->host;
	struct fuse_conn *fc = get_fuse_conn(inode);
892
	struct fuse_fill_data data;
893
	int err;
894
	unsigned int nr_alloc = min_t(unsigned int, nr_pages, fc->max_pages);
895

896
	err = -EIO;
897
	if (is_bad_inode(inode))
898
		goto out;
899

900
	data.file = file;
901
	data.inode = inode;
902
903
904
905
	if (fc->async_read)
		data.req = fuse_get_req_for_background(fc, nr_alloc);
	else
		data.req = fuse_get_req(fc, nr_alloc);
Maxim Patlasov's avatar
Maxim Patlasov committed
906
	data.nr_pages = nr_pages;
907
	err = PTR_ERR(data.req);
908
	if (IS_ERR(data.req))
909
		goto out;
910
911

	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
912
913
	if (!err) {
		if (data.req->num_pages)
914
			fuse_send_readpages(data.req, file);
915
916
917
		else
			fuse_put_request(fc, data.req);
	}
918
out:
919
	return err;
920
921
}

922
static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
Miklos Szeredi's avatar
Miklos Szeredi committed
923
924
{
	struct inode *inode = iocb->ki_filp->f_mapping->host;
925
	struct fuse_conn *fc = get_fuse_conn(inode);
Miklos Szeredi's avatar
Miklos Szeredi committed
926

927
928
929
930
931
932
	/*
	 * In auto invalidate mode, always update attributes on read.
	 * Otherwise, only update if we attempt to read past EOF (to ensure
	 * i_size is up to date).
	 */
	if (fc->auto_inval_data ||
933
	    (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) {
Miklos Szeredi's avatar
Miklos Szeredi committed
934
		int err;
Miklos Szeredi's avatar
Miklos Szeredi committed
935
		err = fuse_update_attributes(inode, iocb->ki_filp);
Miklos Szeredi's avatar
Miklos Szeredi committed
936
937
938
939
		if (err)
			return err;
	}

940
	return generic_file_read_iter(iocb, to);
Miklos Szeredi's avatar
Miklos Szeredi committed
941
942
}

943
static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
944
			    loff_t pos, size_t count)
945
{
946
947
	struct fuse_write_in *inarg = &req->misc.write.in;
	struct fuse_write_out *outarg = &req->misc.write.out;
948

949
950
951
	inarg->fh = ff->fh;
	inarg->offset = pos;
	inarg->size = count;
952
	req->in.h.opcode = FUSE_WRITE;
953
	req->in.h.nodeid = ff->nodeid;
954
	req->in.numargs = 2;
955
	if (ff->fc->minor < 9)
956
957
958
		req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
	else
		req->in.args[0].size = sizeof(struct fuse_write_in);
959
	req->in.args[0].value = inarg;
960
961
962
	req->in.args[1].size = count;
	req->out.numargs = 1;
	req->out.args[0].size = sizeof(struct fuse_write_out);
963
964
965
	req->out.args[0].value = outarg;
}

966
static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
967
			      loff_t pos, size_t count, fl_owner_t owner)
968
{
969
970
	struct kiocb *iocb = io->iocb;
	struct file *file = iocb->ki_filp;
971
972
	struct fuse_file *ff = file->private_data;
	struct fuse_conn *fc = ff->fc;
973
974
	struct fuse_write_in *inarg = &req->misc.write.in;

975
	fuse_write_fill(req, ff, pos, count);
976
	inarg->flags = file->f_flags;
977
978
979
980
	if (iocb->ki_flags & IOCB_DSYNC)
		inarg->flags |= O_DSYNC;
	if (iocb->ki_flags & IOCB_SYNC)
		inarg->flags |= O_SYNC;
981
982
983
984
	if (owner != NULL) {
		inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
		inarg->lock_owner = fuse_lock_owner_id(fc, owner);
	}
985
986
987
988

	if (io->async)
		return fuse_async_req_send