file.c 71.4 KB
Newer Older
1
2
/*
  FUSE: Filesystem in Userspace
Miklos Szeredi's avatar
Miklos Szeredi committed
3
  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5
6
7
8
9
10
11
12
13

  This program can be distributed under the terms of the GNU GPL.
  See the file COPYING.
*/

#include "fuse_i.h"

#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/kernel.h>
Alexey Dobriyan's avatar
Alexey Dobriyan committed
14
#include <linux/sched.h>
15
#include <linux/module.h>
16
#include <linux/compat.h>
17
#include <linux/swap.h>
18
#include <linux/aio.h>
19
#include <linux/falloc.h>
20

21
static const struct file_operations fuse_direct_io_file_operations;
22

23
24
static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
			  int opcode, struct fuse_open_out *outargp)
25
26
{
	struct fuse_open_in inarg;
27
28
29
	struct fuse_req *req;
	int err;

30
	req = fuse_get_req_nopages(fc);
31
32
	if (IS_ERR(req))
		return PTR_ERR(req);
33
34

	memset(&inarg, 0, sizeof(inarg));
35
36
37
	inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
	if (!fc->atomic_o_trunc)
		inarg.flags &= ~O_TRUNC;
38
39
	req->in.h.opcode = opcode;
	req->in.h.nodeid = nodeid;
40
41
42
43
44
45
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(inarg);
	req->in.args[0].value = &inarg;
	req->out.numargs = 1;
	req->out.args[0].size = sizeof(*outargp);
	req->out.args[0].value = outargp;
46
	fuse_request_send(fc, req);
47
48
49
50
51
52
	err = req->out.h.error;
	fuse_put_request(fc, req);

	return err;
}

Tejun Heo's avatar
Tejun Heo committed
53
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
54
55
{
	struct fuse_file *ff;
Tejun Heo's avatar
Tejun Heo committed
56

57
	ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
Tejun Heo's avatar
Tejun Heo committed
58
59
60
	if (unlikely(!ff))
		return NULL;

61
	ff->fc = fc;
62
	ff->reserved_req = fuse_request_alloc(0);
Tejun Heo's avatar
Tejun Heo committed
63
64
65
	if (unlikely(!ff->reserved_req)) {
		kfree(ff);
		return NULL;
66
	}
Tejun Heo's avatar
Tejun Heo committed
67
68
69
70
71
72
73
74
75
76

	INIT_LIST_HEAD(&ff->write_entry);
	atomic_set(&ff->count, 0);
	RB_CLEAR_NODE(&ff->polled_node);
	init_waitqueue_head(&ff->poll_wait);

	spin_lock(&fc->lock);
	ff->kh = ++fc->khctr;
	spin_unlock(&fc->lock);

77
78
79
80
81
	return ff;
}

void fuse_file_free(struct fuse_file *ff)
{
82
	fuse_request_free(ff->reserved_req);
83
84
85
	kfree(ff);
}

86
struct fuse_file *fuse_file_get(struct fuse_file *ff)
87
88
89
90
91
{
	atomic_inc(&ff->count);
	return ff;
}

92
93
94
95
96
97
98
99
100
101
102
103
104
105
static void fuse_release_async(struct work_struct *work)
{
	struct fuse_req *req;
	struct fuse_conn *fc;
	struct path path;

	req = container_of(work, struct fuse_req, misc.release.work);
	path = req->misc.release.path;
	fc = get_fuse_conn(path.dentry->d_inode);

	fuse_put_request(fc, req);
	path_put(&path);
}

Miklos Szeredi's avatar
Miklos Szeredi committed
106
107
static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
{
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
	if (fc->destroy_req) {
		/*
		 * If this is a fuseblk mount, then it's possible that
		 * releasing the path will result in releasing the
		 * super block and sending the DESTROY request.  If
		 * the server is single threaded, this would hang.
		 * For this reason do the path_put() in a separate
		 * thread.
		 */
		atomic_inc(&req->count);
		INIT_WORK(&req->misc.release.work, fuse_release_async);
		schedule_work(&req->misc.release.work);
	} else {
		path_put(&req->misc.release.path);
	}
Miklos Szeredi's avatar
Miklos Szeredi committed
123
124
}

125
static void fuse_file_put(struct fuse_file *ff, bool sync)
126
127
128
{
	if (atomic_dec_and_test(&ff->count)) {
		struct fuse_req *req = ff->reserved_req;
129

130
131
132
133
134
135
136
137
138
		if (ff->fc->no_open) {
			/*
			 * Drop the release request when client does not
			 * implement 'open'
			 */
			req->background = 0;
			path_put(&req->misc.release.path);
			fuse_put_request(ff->fc, req);
		} else if (sync) {
139
			req->background = 0;
140
141
142
143
144
			fuse_request_send(ff->fc, req);
			path_put(&req->misc.release.path);
			fuse_put_request(ff->fc, req);
		} else {
			req->end = fuse_release_end;
145
			req->background = 1;
146
147
			fuse_request_send_background(ff->fc, req);
		}
148
149
150
151
		kfree(ff);
	}
}

152
153
int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
		 bool isdir)
154
155
156
157
158
159
160
161
{
	struct fuse_file *ff;
	int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;

	ff = fuse_file_alloc(fc);
	if (!ff)
		return -ENOMEM;

162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
	ff->fh = 0;
	ff->open_flags = FOPEN_KEEP_CACHE; /* Default for no-open */
	if (!fc->no_open || isdir) {
		struct fuse_open_out outarg;
		int err;

		err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
		if (!err) {
			ff->fh = outarg.fh;
			ff->open_flags = outarg.open_flags;

		} else if (err != -ENOSYS || isdir) {
			fuse_file_free(ff);
			return err;
		} else {
			fc->no_open = 1;
		}
179
180
181
	}

	if (isdir)
182
		ff->open_flags &= ~FOPEN_DIRECT_IO;
183
184
185
186
187
188

	ff->nodeid = nodeid;
	file->private_data = fuse_file_get(ff);

	return 0;
}
189
EXPORT_SYMBOL_GPL(fuse_do_open);
190

191
void fuse_finish_open(struct inode *inode, struct file *file)
192
{
193
	struct fuse_file *ff = file->private_data;
194
	struct fuse_conn *fc = get_fuse_conn(inode);
195
196

	if (ff->open_flags & FOPEN_DIRECT_IO)
197
		file->f_op = &fuse_direct_io_file_operations;
198
	if (!(ff->open_flags & FOPEN_KEEP_CACHE))
Miklos Szeredi's avatar
Miklos Szeredi committed
199
		invalidate_inode_pages2(inode->i_mapping);
200
	if (ff->open_flags & FOPEN_NONSEEKABLE)
Tejun Heo's avatar
Tejun Heo committed
201
		nonseekable_open(inode, file);
202
203
204
205
206
207
208
209
210
	if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
		struct fuse_inode *fi = get_fuse_inode(inode);

		spin_lock(&fc->lock);
		fi->attr_version = ++fc->attr_version;
		i_size_write(inode, 0);
		spin_unlock(&fc->lock);
		fuse_invalidate_attr(inode);
	}
211
212
}

213
int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
214
{
Tejun Heo's avatar
Tejun Heo committed
215
	struct fuse_conn *fc = get_fuse_conn(inode);
216
217
218
219
220
221
	int err;

	err = generic_file_open(inode, file);
	if (err)
		return err;

222
	err = fuse_do_open(fc, get_node_id(inode), file, isdir);
223
	if (err)
224
		return err;
225

226
227
228
	fuse_finish_open(inode, file);

	return 0;
229
230
}

231
static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
232
{
233
	struct fuse_conn *fc = ff->fc;
234
	struct fuse_req *req = ff->reserved_req;
235
	struct fuse_release_in *inarg = &req->misc.release.in;
236

237
238
239
240
241
242
	spin_lock(&fc->lock);
	list_del(&ff->write_entry);
	if (!RB_EMPTY_NODE(&ff->polled_node))
		rb_erase(&ff->polled_node, &fc->polled_files);
	spin_unlock(&fc->lock);

243
	wake_up_interruptible_all(&ff->poll_wait);
244

245
	inarg->fh = ff->fh;
246
	inarg->flags = flags;
247
	req->in.h.opcode = opcode;
248
	req->in.h.nodeid = ff->nodeid;
249
250
251
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(struct fuse_release_in);
	req->in.args[0].value = inarg;
252
253
}

254
void fuse_release_common(struct file *file, int opcode)
255
{
Tejun Heo's avatar
Tejun Heo committed
256
257
	struct fuse_file *ff;
	struct fuse_req *req;
258

Tejun Heo's avatar
Tejun Heo committed
259
260
	ff = file->private_data;
	if (unlikely(!ff))
261
		return;
Tejun Heo's avatar
Tejun Heo committed
262
263

	req = ff->reserved_req;
264
	fuse_prepare_release(ff, file->f_flags, opcode);
Tejun Heo's avatar
Tejun Heo committed
265

Miklos Szeredi's avatar
Miklos Szeredi committed
266
267
268
269
270
271
	if (ff->flock) {
		struct fuse_release_in *inarg = &req->misc.release.in;
		inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
		inarg->lock_owner = fuse_lock_owner_id(ff->fc,
						       (fl_owner_t) file);
	}
Tejun Heo's avatar
Tejun Heo committed
272
	/* Hold vfsmount and dentry until release is finished */
273
274
	path_get(&file->f_path);
	req->misc.release.path = file->f_path;
Tejun Heo's avatar
Tejun Heo committed
275
276
277
278
279

	/*
	 * Normally this will send the RELEASE request, however if
	 * some asynchronous READ or WRITE requests are outstanding,
	 * the sending will be delayed.
280
281
282
283
	 *
	 * Make the release synchronous if this is a fuseblk mount,
	 * synchronous RELEASE is allowed (and desirable) in this case
	 * because the server can be trusted not to screw up.
Tejun Heo's avatar
Tejun Heo committed
284
	 */
285
	fuse_file_put(ff, ff->fc->destroy_req != NULL);
286
287
}

288
289
static int fuse_open(struct inode *inode, struct file *file)
{
290
	return fuse_open_common(inode, file, false);
291
292
293
294
}

static int fuse_release(struct inode *inode, struct file *file)
{
295
296
297
298
299
300
301
302
303
304
305
	fuse_release_common(file, FUSE_RELEASE);

	/* return value is ignored by VFS */
	return 0;
}

void fuse_sync_release(struct fuse_file *ff, int flags)
{
	WARN_ON(atomic_read(&ff->count) > 1);
	fuse_prepare_release(ff, flags, FUSE_RELEASE);
	ff->reserved_req->force = 1;
306
	ff->reserved_req->background = 0;
307
308
309
	fuse_request_send(ff->fc, ff->reserved_req);
	fuse_put_request(ff->fc, ff->reserved_req);
	kfree(ff);
310
}
311
EXPORT_SYMBOL_GPL(fuse_sync_release);
312

313
/*
314
315
 * Scramble the ID space with XTEA, so that the value of the files_struct
 * pointer is not exposed to userspace.
316
 */
317
u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
318
{
319
320
321
322
323
324
325
326
327
328
329
330
331
332
	u32 *k = fc->scramble_key;
	u64 v = (unsigned long) id;
	u32 v0 = v;
	u32 v1 = v >> 32;
	u32 sum = 0;
	int i;

	for (i = 0; i < 32; i++) {
		v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
		sum += 0x9E3779B9;
		v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
	}

	return (u64) v0 + ((u64) v1 << 32);
333
334
}

Miklos Szeredi's avatar
Miklos Szeredi committed
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
/*
 * Check if page is under writeback
 *
 * This is currently done by walking the list of writepage requests
 * for the inode, which can be pretty inefficient.
 */
static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct fuse_req *req;
	bool found = false;

	spin_lock(&fc->lock);
	list_for_each_entry(req, &fi->writepages, writepages_entry) {
		pgoff_t curr_index;

		BUG_ON(req->inode != inode);
		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
354
355
		if (curr_index <= index &&
		    index < curr_index + req->num_pages) {
Miklos Szeredi's avatar
Miklos Szeredi committed
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
			found = true;
			break;
		}
	}
	spin_unlock(&fc->lock);

	return found;
}

/*
 * Wait for page writeback to be completed.
 *
 * Since fuse doesn't rely on the VM writeback tracking, this has to
 * use some other means.
 */
static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
{
	struct fuse_inode *fi = get_fuse_inode(inode);

	wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
	return 0;
}

379
static int fuse_flush(struct file *file, fl_owner_t id)
380
{
381
	struct inode *inode = file_inode(file);
382
383
384
385
386
387
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_file *ff = file->private_data;
	struct fuse_req *req;
	struct fuse_flush_in inarg;
	int err;

388
389
390
	if (is_bad_inode(inode))
		return -EIO;

391
392
393
	if (fc->no_flush)
		return 0;

394
	req = fuse_get_req_nofail_nopages(fc, file);
395
396
	memset(&inarg, 0, sizeof(inarg));
	inarg.fh = ff->fh;
397
	inarg.lock_owner = fuse_lock_owner_id(fc, id);
398
399
400
401
402
	req->in.h.opcode = FUSE_FLUSH;
	req->in.h.nodeid = get_node_id(inode);
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(inarg);
	req->in.args[0].value = &inarg;
403
	req->force = 1;
404
	fuse_request_send(fc, req);
405
406
407
408
409
410
411
412
413
	err = req->out.h.error;
	fuse_put_request(fc, req);
	if (err == -ENOSYS) {
		fc->no_flush = 1;
		err = 0;
	}
	return err;
}

Miklos Szeredi's avatar
Miklos Szeredi committed
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
/*
 * Wait for all pending writepages on the inode to finish.
 *
 * This is currently done by blocking further writes with FUSE_NOWRITE
 * and waiting for all sent writes to complete.
 *
 * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
 * could conflict with truncation.
 */
static void fuse_sync_writes(struct inode *inode)
{
	fuse_set_nowrite(inode);
	fuse_release_nowrite(inode);
}

429
430
int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
		      int datasync, int isdir)
431
{
432
	struct inode *inode = file->f_mapping->host;
433
434
435
436
437
438
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_file *ff = file->private_data;
	struct fuse_req *req;
	struct fuse_fsync_in inarg;
	int err;

439
440
441
	if (is_bad_inode(inode))
		return -EIO;

442
443
444
445
	err = filemap_write_and_wait_range(inode->i_mapping, start, end);
	if (err)
		return err;

446
	if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
447
448
		return 0;

449
450
	mutex_lock(&inode->i_mutex);

Miklos Szeredi's avatar
Miklos Szeredi committed
451
452
453
454
455
456
457
	/*
	 * Start writeback against all dirty pages of the inode, then
	 * wait for all outstanding writes, before sending the FSYNC
	 * request.
	 */
	err = write_inode_now(inode, 0);
	if (err)
458
		goto out;
Miklos Szeredi's avatar
Miklos Szeredi committed
459
460
461

	fuse_sync_writes(inode);

462
	req = fuse_get_req_nopages(fc);
463
464
465
466
	if (IS_ERR(req)) {
		err = PTR_ERR(req);
		goto out;
	}
467
468
469
470

	memset(&inarg, 0, sizeof(inarg));
	inarg.fh = ff->fh;
	inarg.fsync_flags = datasync ? 1 : 0;
471
	req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
472
473
474
475
	req->in.h.nodeid = get_node_id(inode);
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(inarg);
	req->in.args[0].value = &inarg;
476
	fuse_request_send(fc, req);
477
478
479
	err = req->out.h.error;
	fuse_put_request(fc, req);
	if (err == -ENOSYS) {
480
481
482
483
		if (isdir)
			fc->no_fsyncdir = 1;
		else
			fc->no_fsync = 1;
484
485
		err = 0;
	}
486
487
out:
	mutex_unlock(&inode->i_mutex);
488
489
490
	return err;
}

491
492
static int fuse_fsync(struct file *file, loff_t start, loff_t end,
		      int datasync)
493
{
494
	return fuse_fsync_common(file, start, end, datasync, 0);
495
496
}

497
498
void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
		    size_t count, int opcode)
499
{
500
	struct fuse_read_in *inarg = &req->misc.read.in;
501
	struct fuse_file *ff = file->private_data;
502

503
504
505
	inarg->fh = ff->fh;
	inarg->offset = pos;
	inarg->size = count;
506
	inarg->flags = file->f_flags;
507
	req->in.h.opcode = opcode;
508
	req->in.h.nodeid = ff->nodeid;
509
510
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(struct fuse_read_in);
511
	req->in.args[0].value = inarg;
512
513
514
515
516
	req->out.argvar = 1;
	req->out.numargs = 1;
	req->out.args[0].size = count;
}

517
518
519
520
521
522
523
524
525
526
527
528
static void fuse_release_user_pages(struct fuse_req *req, int write)
{
	unsigned i;

	for (i = 0; i < req->num_pages; i++) {
		struct page *page = req->pages[i];
		if (write)
			set_page_dirty_lock(page);
		put_page(page);
	}
}

Maxim Patlasov's avatar
Maxim Patlasov committed
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
/**
 * In case of short read, the caller sets 'pos' to the position of
 * actual end of fuse request in IO request. Otherwise, if bytes_requested
 * == bytes_transferred or rw == WRITE, the caller sets 'pos' to -1.
 *
 * An example:
 * User requested DIO read of 64K. It was splitted into two 32K fuse requests,
 * both submitted asynchronously. The first of them was ACKed by userspace as
 * fully completed (req->out.args[0].size == 32K) resulting in pos == -1. The
 * second request was ACKed as short, e.g. only 1K was read, resulting in
 * pos == 33K.
 *
 * Thus, when all fuse requests are completed, the minimal non-negative 'pos'
 * will be equal to the length of the longest contiguous fragment of
 * transferred data starting from the beginning of IO request.
 */
static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
{
	int left;

	spin_lock(&io->lock);
	if (err)
		io->err = io->err ? : err;
	else if (pos >= 0 && (io->bytes < 0 || pos < io->bytes))
		io->bytes = pos;

	left = --io->reqs;
	spin_unlock(&io->lock);

	if (!left) {
		long res;

		if (io->err)
			res = io->err;
		else if (io->bytes >= 0 && io->write)
			res = -EIO;
		else {
			res = io->bytes < 0 ? io->size : io->bytes;

			if (!is_sync_kiocb(io->iocb)) {
569
				struct inode *inode = file_inode(io->iocb->ki_filp);
Maxim Patlasov's avatar
Maxim Patlasov committed
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
				struct fuse_conn *fc = get_fuse_conn(inode);
				struct fuse_inode *fi = get_fuse_inode(inode);

				spin_lock(&fc->lock);
				fi->attr_version = ++fc->attr_version;
				spin_unlock(&fc->lock);
			}
		}

		aio_complete(io->iocb, res, 0);
		kfree(io);
	}
}

static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
{
	struct fuse_io_priv *io = req->io;
	ssize_t pos = -1;

	fuse_release_user_pages(req, !io->write);

	if (io->write) {
		if (req->misc.write.in.size != req->misc.write.out.size)
			pos = req->misc.write.in.offset - io->offset +
				req->misc.write.out.size;
	} else {
		if (req->misc.read.in.size != req->out.args[0].size)
			pos = req->misc.read.in.offset - io->offset +
				req->out.args[0].size;
	}

	fuse_aio_complete(io, req->out.h.error, pos);
}

static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req,
		size_t num_bytes, struct fuse_io_priv *io)
{
	spin_lock(&io->lock);
	io->size += num_bytes;
	io->reqs++;
	spin_unlock(&io->lock);

	req->io = io;
	req->end = fuse_aio_complete_req;

615
	__fuse_get_request(req);
Maxim Patlasov's avatar
Maxim Patlasov committed
616
617
618
619
620
	fuse_request_send_background(fc, req);

	return num_bytes;
}

621
static size_t fuse_send_read(struct fuse_req *req, struct fuse_io_priv *io,
622
			     loff_t pos, size_t count, fl_owner_t owner)
623
{
624
	struct file *file = io->file;
625
626
	struct fuse_file *ff = file->private_data;
	struct fuse_conn *fc = ff->fc;
627

628
	fuse_read_fill(req, file, pos, count, FUSE_READ);
629
	if (owner != NULL) {
630
		struct fuse_read_in *inarg = &req->misc.read.in;
631
632
633
634

		inarg->read_flags |= FUSE_READ_LOCKOWNER;
		inarg->lock_owner = fuse_lock_owner_id(fc, owner);
	}
635
636
637
638

	if (io->async)
		return fuse_async_req_send(fc, req, count, io);

639
	fuse_request_send(fc, req);
640
	return req->out.args[0].size;
641
642
}

643
644
645
646
647
648
649
static void fuse_read_update_size(struct inode *inode, loff_t size,
				  u64 attr_ver)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);

	spin_lock(&fc->lock);
650
651
	if (attr_ver == fi->attr_version && size < inode->i_size &&
	    !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
652
653
654
655
656
657
		fi->attr_version = ++fc->attr_version;
		i_size_write(inode, size);
	}
	spin_unlock(&fc->lock);
}

658
659
static int fuse_readpage(struct file *file, struct page *page)
{
660
	struct fuse_io_priv io = { .async = 0, .file = file };
661
662
	struct inode *inode = page->mapping->host;
	struct fuse_conn *fc = get_fuse_conn(inode);
663
	struct fuse_req *req;
664
665
666
667
	size_t num_read;
	loff_t pos = page_offset(page);
	size_t count = PAGE_CACHE_SIZE;
	u64 attr_ver;
668
669
670
671
672
673
	int err;

	err = -EIO;
	if (is_bad_inode(inode))
		goto out;

Miklos Szeredi's avatar
Miklos Szeredi committed
674
	/*
Lucas De Marchi's avatar
Lucas De Marchi committed
675
	 * Page writeback can extend beyond the lifetime of the
Miklos Szeredi's avatar
Miklos Szeredi committed
676
677
678
679
680
	 * page-cache page, so make sure we read a properly synced
	 * page.
	 */
	fuse_wait_on_page_writeback(inode, page->index);

681
	req = fuse_get_req(fc, 1);
682
683
	err = PTR_ERR(req);
	if (IS_ERR(req))
684
685
		goto out;

686
687
	attr_ver = fuse_get_attr_version(fc);

688
	req->out.page_zeroing = 1;
689
	req->out.argpages = 1;
690
691
	req->num_pages = 1;
	req->pages[0] = page;
692
	req->page_descs[0].length = count;
693
	num_read = fuse_send_read(req, &io, pos, count, NULL);
694
695
	err = req->out.h.error;
	fuse_put_request(fc, req);
696
697
698
699
700
701
702
703

	if (!err) {
		/*
		 * Short read means EOF.  If file size is larger, truncate it
		 */
		if (num_read < count)
			fuse_read_update_size(inode, pos + num_read, attr_ver);

704
		SetPageUptodate(page);
705
706
	}

707
	fuse_invalidate_atime(inode);
708
709
710
711
712
 out:
	unlock_page(page);
	return err;
}

713
static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
714
{
715
	int i;
716
717
	size_t count = req->misc.read.in.size;
	size_t num_read = req->out.args[0].size;
718
	struct address_space *mapping = NULL;
719

720
721
	for (i = 0; mapping == NULL && i < req->num_pages; i++)
		mapping = req->pages[i]->mapping;
722

723
724
725
726
727
728
729
730
731
732
733
734
735
	if (mapping) {
		struct inode *inode = mapping->host;

		/*
		 * Short read means EOF. If file size is larger, truncate it
		 */
		if (!req->out.h.error && num_read < count) {
			loff_t pos;

			pos = page_offset(req->pages[0]) + num_read;
			fuse_read_update_size(inode, pos,
					      req->misc.read.attr_ver);
		}
736
		fuse_invalidate_atime(inode);
737
	}
738

739
740
741
742
	for (i = 0; i < req->num_pages; i++) {
		struct page *page = req->pages[i];
		if (!req->out.h.error)
			SetPageUptodate(page);
743
744
		else
			SetPageError(page);
745
		unlock_page(page);
746
		page_cache_release(page);
747
	}
748
	if (req->ff)
749
		fuse_file_put(req->ff, false);
750
751
}

752
static void fuse_send_readpages(struct fuse_req *req, struct file *file)
753
{
754
755
	struct fuse_file *ff = file->private_data;
	struct fuse_conn *fc = ff->fc;
756
757
	loff_t pos = page_offset(req->pages[0]);
	size_t count = req->num_pages << PAGE_CACHE_SHIFT;
758
759

	req->out.argpages = 1;
760
	req->out.page_zeroing = 1;
761
	req->out.page_replace = 1;
762
	fuse_read_fill(req, file, pos, count, FUSE_READ);
763
	req->misc.read.attr_ver = fuse_get_attr_version(fc);
764
	if (fc->async_read) {
765
		req->ff = fuse_file_get(ff);
766
		req->end = fuse_readpages_end;
767
		fuse_request_send_background(fc, req);
768
	} else {
769
		fuse_request_send(fc, req);
770
		fuse_readpages_end(fc, req);
771
		fuse_put_request(fc, req);
772
	}
773
774
}

775
struct fuse_fill_data {
776
	struct fuse_req *req;
777
	struct file *file;
778
	struct inode *inode;
Maxim Patlasov's avatar
Maxim Patlasov committed
779
	unsigned nr_pages;
780
781
782
783
};

static int fuse_readpages_fill(void *_data, struct page *page)
{
784
	struct fuse_fill_data *data = _data;
785
786
787
788
	struct fuse_req *req = data->req;
	struct inode *inode = data->inode;
	struct fuse_conn *fc = get_fuse_conn(inode);

Miklos Szeredi's avatar
Miklos Szeredi committed
789
790
	fuse_wait_on_page_writeback(inode, page->index);

791
792
793
794
	if (req->num_pages &&
	    (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
	     (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
	     req->pages[req->num_pages - 1]->index + 1 != page->index)) {
Maxim Patlasov's avatar
Maxim Patlasov committed
795
796
		int nr_alloc = min_t(unsigned, data->nr_pages,
				     FUSE_MAX_PAGES_PER_REQ);
797
		fuse_send_readpages(req, data->file);
798
799
800
801
802
803
		if (fc->async_read)
			req = fuse_get_req_for_background(fc, nr_alloc);
		else
			req = fuse_get_req(fc, nr_alloc);

		data->req = req;
804
		if (IS_ERR(req)) {
805
			unlock_page(page);
806
			return PTR_ERR(req);
807
808
		}
	}
Maxim Patlasov's avatar
Maxim Patlasov committed
809
810
811
812
813
814

	if (WARN_ON(req->num_pages >= req->max_pages)) {
		fuse_put_request(fc, req);
		return -EIO;
	}

815
	page_cache_get(page);
816
	req->pages[req->num_pages] = page;
817
	req->page_descs[req->num_pages].length = PAGE_SIZE;
Miklos Szeredi's avatar
Miklos Szeredi committed
818
	req->num_pages++;
Maxim Patlasov's avatar
Maxim Patlasov committed
819
	data->nr_pages--;
820
821
822
823
824
825
826
827
	return 0;
}

static int fuse_readpages(struct file *file, struct address_space *mapping,
			  struct list_head *pages, unsigned nr_pages)
{
	struct inode *inode = mapping->host;
	struct fuse_conn *fc = get_fuse_conn(inode);
828
	struct fuse_fill_data data;
829
	int err;
Maxim Patlasov's avatar
Maxim Patlasov committed
830
	int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ);
831

832
	err = -EIO;
833
	if (is_bad_inode(inode))
834
		goto out;
835

836
	data.file = file;
837
	data.inode = inode;
838
839
840
841
	if (fc->async_read)
		data.req = fuse_get_req_for_background(fc, nr_alloc);
	else
		data.req = fuse_get_req(fc, nr_alloc);
Maxim Patlasov's avatar
Maxim Patlasov committed
842
	data.nr_pages = nr_pages;
843
	err = PTR_ERR(data.req);
844
	if (IS_ERR(data.req))
845
		goto out;
846
847

	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
848
849
	if (!err) {
		if (data.req->num_pages)
850
			fuse_send_readpages(data.req, file);
851
852
853
		else
			fuse_put_request(fc, data.req);
	}
854
out:
855
	return err;
856
857
}

Miklos Szeredi's avatar
Miklos Szeredi committed
858
859
860
861
static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
				  unsigned long nr_segs, loff_t pos)
{
	struct inode *inode = iocb->ki_filp->f_mapping->host;
862
	struct fuse_conn *fc = get_fuse_conn(inode);
Miklos Szeredi's avatar
Miklos Szeredi committed
863

864
865
866
867
868
869
870
	/*
	 * In auto invalidate mode, always update attributes on read.
	 * Otherwise, only update if we attempt to read past EOF (to ensure
	 * i_size is up to date).
	 */
	if (fc->auto_inval_data ||
	    (pos + iov_length(iov, nr_segs) > i_size_read(inode))) {
Miklos Szeredi's avatar
Miklos Szeredi committed
871
872
873
874
875
876
877
878
879
		int err;
		err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL);
		if (err)
			return err;
	}

	return generic_file_aio_read(iocb, iov, nr_segs, pos);
}

880
static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
881
			    loff_t pos, size_t count)
882
{
883
884
	struct fuse_write_in *inarg = &req->misc.write.in;
	struct fuse_write_out *outarg = &req->misc.write.out;
885

886
887
888
	inarg->fh = ff->fh;
	inarg->offset = pos;
	inarg->size = count;
889
	req->in.h.opcode = FUSE_WRITE;
890
	req->in.h.nodeid = ff->nodeid;
891
	req->in.numargs = 2;
892
	if (ff->fc->minor < 9)
893
894
895
		req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
	else
		req->in.args[0].size = sizeof(struct fuse_write_in);
896
	req->in.args[0].value = inarg;
897
898
899
	req->in.args[1].size = count;
	req->out.numargs = 1;
	req->out.args[0].size = sizeof(struct fuse_write_out);
900
901
902
	req->out.args[0].value = outarg;
}

903
static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
904
			      loff_t pos, size_t count, fl_owner_t owner)
905
{
906
	struct file *file = io->file;
907
908
	struct fuse_file *ff = file->private_data;
	struct fuse_conn *fc = ff->fc;
909
910
	struct fuse_write_in *inarg = &req->misc.write.in;

911
	fuse_write_fill(req, ff, pos, count);
912
	inarg->flags = file->f_flags;
913
914
915
916
	if (owner != NULL) {
		inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
		inarg->lock_owner = fuse_lock_owner_id(fc, owner);
	}
917
918
919
920

	if (io->async)
		return fuse_async_req_send(fc, req, count, io);

921
	fuse_request_send(fc, req);
922
	return req->misc.write.out.size;
923
924
}

Miklos Szeredi's avatar
Miklos Szeredi committed
925
void fuse_write_update_size(struct inode *inode, loff_t pos)
926
927
928
929
930
931
932
933
934
935
936
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);

	spin_lock(&fc->lock);
	fi->attr_version = ++fc->attr_version;
	if (pos > inode->i_size)
		i_size_write(inode, pos);
	spin_unlock(&fc->lock);
}

Nick Piggin's avatar
Nick Piggin committed
937
938
939
940
941
942
943
static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
				    struct inode *inode, loff_t pos,
				    size_t count)
{
	size_t res;
	unsigned offset;
	unsigned i;
944
	struct fuse_io_priv io = { .async = 0, .file = file };
Nick Piggin's avatar
Nick Piggin committed
945
946
947
948

	for (i = 0; i < req->num_pages; i++)
		fuse_wait_on_page_writeback(inode, req->pages[i]->index);

949
	res = fuse_send_write(req, &io, pos, count, NULL);
Nick Piggin's avatar
Nick Piggin committed
950

951
	offset = req->page_descs[0].offset;
Nick Piggin's avatar
Nick Piggin committed
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
	count = res;
	for (i = 0; i < req->num_pages; i++) {
		struct page *page = req->pages[i];

		if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE)
			SetPageUptodate(page);

		if (count > PAGE_CACHE_SIZE - offset)
			count -= PAGE_CACHE_SIZE - offset;
		else
			count = 0;
		offset = 0;

		unlock_page(page);
		page_cache_release(page);
	}

	return res;
}

static ssize_t fuse_fill_write_pages(struct fuse_req *req,
			       struct address_space *mapping,
			       struct iov_iter *ii, loff_t pos)
{
	struct fuse_conn *fc = get_fuse_conn(mapping->host);
	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
	size_t count = 0;
	int err;

981
	req->in.argpages = 1;
982
	req->page_descs[0].offset = offset;
Nick Piggin's avatar
Nick Piggin committed
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998

	do {
		size_t tmp;
		struct page *page;
		pgoff_t index = pos >> PAGE_CACHE_SHIFT;
		size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset,
				     iov_iter_count(ii));

		bytes = min_t(size_t, bytes, fc->max_write - count);

 again:
		err = -EFAULT;
		if (iov_iter_fault_in_readable(ii, bytes))
			break;

		err = -ENOMEM;
999
		page = grab_cache_page_write_begin(mapping, index, 0);
Nick Piggin's avatar
Nick Piggin committed
1000
1001
1002
		if (!page)
			break;

1003
1004
1005
		if (mapping_writably_mapped(mapping))
			flush_dcache_page(page);

Nick Piggin's avatar
Nick Piggin committed
1006
1007
1008
1009
1010
		pagefault_disable();
		tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
		pagefault_enable();
		flush_dcache_page(page);

1011
1012
		mark_page_accessed(page);

Nick Piggin's avatar
Nick Piggin committed
1013
1014
1015
1016
1017
1018
1019
1020
1021
		if (!tmp) {
			unlock_page(page);
			page_cache_release(page);
			bytes = min(bytes, iov_iter_single_seg_count(ii));
			goto again;
		}

		err = 0;
		req->pages[req->num_pages] = page;
1022
		req->page_descs[req->num_pages].length = tmp;
Nick Piggin's avatar
Nick Piggin committed
1023
1024
1025
1026
1027
1028
1029
1030
1031
		req->num_pages++;

		iov_iter_advance(ii, tmp);
		count += tmp;
		pos += tmp;
		offset += tmp;
		if (offset == PAGE_CACHE_SIZE)
			offset = 0;

1032
1033
		if (!fc->big_writes)
			break;
Nick Piggin's avatar
Nick Piggin committed
1034
	} while (iov_iter_count(ii) && count < fc->max_write &&
1035
		 req->num_pages < req->max_pages && offset == 0);
Nick Piggin's avatar
Nick Piggin committed
1036
1037
1038
1039

	return count > 0 ? count : err;
}

1040
1041
1042
1043
1044
1045
1046
1047
static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
{
	return min_t(unsigned,
		     ((pos + len - 1) >> PAGE_CACHE_SHIFT) -
		     (pos >> PAGE_CACHE_SHIFT) + 1,
		     FUSE_MAX_PAGES_PER_REQ);
}

Nick Piggin's avatar
Nick Piggin committed
1048
1049
1050
1051
1052
1053
static ssize_t fuse_perform_write(struct file *file,
				  struct address_space *mapping,
				  struct iov_iter *ii, loff_t pos)
{
	struct inode *inode = mapping->host;
	struct fuse_conn *fc = get_fuse_conn(inode);
1054
	struct fuse_inode *fi = get_fuse_inode(inode);
Nick Piggin's avatar
Nick Piggin committed
1055
1056
1057
1058
1059
1060
	int err = 0;
	ssize_t res = 0;

	if (is_bad_inode(inode))
		return -EIO;

1061
1062
1063
	if (inode->i_size < pos + iov_iter_count(ii))
		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);

Nick Piggin's avatar
Nick Piggin committed
1064
1065
1066
	do {
		struct fuse_req *req;
		ssize_t count;
1067
		unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
Nick Piggin's avatar
Nick Piggin committed
1068

1069
		req = fuse_get_req(fc, nr_pages);
Nick Piggin's avatar
Nick Piggin committed
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
		if (IS_ERR(req)) {
			err = PTR_ERR(req);
			break;
		}

		count = fuse_fill_write_pages(req, mapping, ii, pos);
		if (count <= 0) {
			err = count;
		} else {
			size_t num_written;

			num_written = fuse_send_write_pages(req, file, inode,
							    pos, count);
			err = req->out.h.error;
			if (!err) {
				res += num_written;
				pos += num_written;

				/* break out of the loop on short write */
				if (num_written != count)
					err = -EIO;
			}
		}
		fuse_put_request(fc, req);
	} while (!err && iov_iter_count(ii));

	if (res > 0)
		fuse_write_update_size(inode, pos);

1099
	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
Nick Piggin's avatar
Nick Piggin committed
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
	fuse_invalidate_attr(inode);

	return res > 0 ? res : err;
}

static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
				   unsigned long nr_segs, loff_t pos)
{
	struct file *file = iocb->ki_filp;
	struct address_space *mapping = file->f_mapping;
	size_t count = 0;
Anand Avati's avatar
Anand Avati committed
1111
	size_t ocount = 0;
Nick Piggin's avatar
Nick Piggin committed
1112
	ssize_t written = 0;
Anand Avati's avatar
Anand Avati committed
1113
	ssize_t written_buffered = 0;
Nick Piggin's avatar
Nick Piggin committed
1114
1115
1116
	struct inode *inode = mapping->host;
	ssize_t err;
	struct iov_iter i;
Anand Avati's avatar
Anand Avati committed
1117
	loff_t endbyte = 0;
Nick Piggin's avatar
Nick Piggin committed
1118
1119
1120

	WARN_ON(iocb->ki_pos != pos);

Anand Avati's avatar
Anand Avati committed
1121
1122
	ocount = 0;
	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
Nick Piggin's avatar
Nick Piggin committed
1123
1124
1125
	if (err)
		return err;

Anand Avati's avatar
Anand Avati committed
1126
	count = ocount;
Nick Piggin's avatar
Nick Piggin committed
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
	mutex_lock(&inode->i_mutex);

	/* We can write back this queue in page reclaim */
	current->backing_dev_info = mapping->backing_dev_info;

	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
	if (err)
		goto out;

	if (count == 0)
		goto out;

1139
	err = file_remove_suid(file);
Nick Piggin's avatar
Nick Piggin committed
1140
1141
1142
	if (err)
		goto out;

1143
1144
1145
	err = file_update_time(file);
	if (err)
		goto out;
Nick Piggin's avatar
Nick Piggin committed
1146

Anand Avati's avatar
Anand Avati committed
1147
1148
1149
1150
1151
1152
1153
1154
1155
	if (file->f_flags & O_DIRECT) {
		written = generic_file_direct_write(iocb, iov, &nr_segs,
						    pos, &iocb->ki_pos,
						    count, ocount);
		if (written < 0 || written == count)
			goto out;

		pos += written;
		count -= written;
Nick Piggin's avatar
Nick Piggin committed
1156

Anand Avati's avatar
Anand Avati committed
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
		iov_iter_init(&i, iov, nr_segs, count, written);
		written_buffered = fuse_perform_write(file, mapping, &i, pos);
		if (written_buffered < 0) {
			err = written_buffered;
			goto out;
		}
		endbyte = pos + written_buffered - 1;

		err = filemap_write_and_wait_range(file->f_mapping, pos,
						   endbyte);
		if (err)
			goto out;

		invalidate_mapping_pages(file->f_mapping,
					 pos >> PAGE_CACHE_SHIFT,
					 endbyte >> PAGE_CACHE_SHIFT);

		written += written_buffered;
		iocb->ki_pos = pos + written_buffered;
	} else {
		iov_iter_init(&i, iov, nr_segs, count, 0);
		written = fuse_perform_write(file, mapping, &i, pos);
		if (written >= 0)
			iocb->ki_pos = pos + written;
	}
Nick Piggin's avatar
Nick Piggin committed
1182
1183
1184
1185
1186
1187
1188
out:
	current->backing_dev_info = NULL;
	mutex_unlock(&inode->i_mutex);

	return written ? written : err;
}

1189
1190
static inline void fuse_page_descs_length_init(struct fuse_req *req,
		unsigned index, unsigned nr_pages)