file.c 61.3 KB
Newer Older
1
2
/*
  FUSE: Filesystem in Userspace
Miklos Szeredi's avatar
Miklos Szeredi committed
3
  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5
6
7
8
9
10
11
12
13

  This program can be distributed under the terms of the GNU GPL.
  See the file COPYING.
*/

#include "fuse_i.h"

#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/kernel.h>
Alexey Dobriyan's avatar
Alexey Dobriyan committed
14
#include <linux/sched.h>
15
#include <linux/module.h>
16
#include <linux/compat.h>
17
#include <linux/swap.h>
18
#include <linux/aio.h>
19

20
static const struct file_operations fuse_direct_io_file_operations;
21

22
23
static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
			  int opcode, struct fuse_open_out *outargp)
24
25
{
	struct fuse_open_in inarg;
26
27
28
	struct fuse_req *req;
	int err;

29
	req = fuse_get_req_nopages(fc);
30
31
	if (IS_ERR(req))
		return PTR_ERR(req);
32
33

	memset(&inarg, 0, sizeof(inarg));
34
35
36
	inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
	if (!fc->atomic_o_trunc)
		inarg.flags &= ~O_TRUNC;
37
38
	req->in.h.opcode = opcode;
	req->in.h.nodeid = nodeid;
39
40
41
42
43
44
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(inarg);
	req->in.args[0].value = &inarg;
	req->out.numargs = 1;
	req->out.args[0].size = sizeof(*outargp);
	req->out.args[0].value = outargp;
45
	fuse_request_send(fc, req);
46
47
48
49
50
51
	err = req->out.h.error;
	fuse_put_request(fc, req);

	return err;
}

Tejun Heo's avatar
Tejun Heo committed
52
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
53
54
{
	struct fuse_file *ff;
Tejun Heo's avatar
Tejun Heo committed
55

56
	ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
Tejun Heo's avatar
Tejun Heo committed
57
58
59
	if (unlikely(!ff))
		return NULL;

60
	ff->fc = fc;
61
	ff->reserved_req = fuse_request_alloc(0);
Tejun Heo's avatar
Tejun Heo committed
62
63
64
	if (unlikely(!ff->reserved_req)) {
		kfree(ff);
		return NULL;
65
	}
Tejun Heo's avatar
Tejun Heo committed
66
67
68
69
70
71
72
73
74
75

	INIT_LIST_HEAD(&ff->write_entry);
	atomic_set(&ff->count, 0);
	RB_CLEAR_NODE(&ff->polled_node);
	init_waitqueue_head(&ff->poll_wait);

	spin_lock(&fc->lock);
	ff->kh = ++fc->khctr;
	spin_unlock(&fc->lock);

76
77
78
79
80
	return ff;
}

void fuse_file_free(struct fuse_file *ff)
{
81
	fuse_request_free(ff->reserved_req);
82
83
84
	kfree(ff);
}

85
struct fuse_file *fuse_file_get(struct fuse_file *ff)
86
87
88
89
90
{
	atomic_inc(&ff->count);
	return ff;
}

91
92
93
94
95
96
97
98
99
100
101
102
103
104
static void fuse_release_async(struct work_struct *work)
{
	struct fuse_req *req;
	struct fuse_conn *fc;
	struct path path;

	req = container_of(work, struct fuse_req, misc.release.work);
	path = req->misc.release.path;
	fc = get_fuse_conn(path.dentry->d_inode);

	fuse_put_request(fc, req);
	path_put(&path);
}

Miklos Szeredi's avatar
Miklos Szeredi committed
105
106
static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
{
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
	if (fc->destroy_req) {
		/*
		 * If this is a fuseblk mount, then it's possible that
		 * releasing the path will result in releasing the
		 * super block and sending the DESTROY request.  If
		 * the server is single threaded, this would hang.
		 * For this reason do the path_put() in a separate
		 * thread.
		 */
		atomic_inc(&req->count);
		INIT_WORK(&req->misc.release.work, fuse_release_async);
		schedule_work(&req->misc.release.work);
	} else {
		path_put(&req->misc.release.path);
	}
Miklos Szeredi's avatar
Miklos Szeredi committed
122
123
}

124
static void fuse_file_put(struct fuse_file *ff, bool sync)
125
126
127
{
	if (atomic_dec_and_test(&ff->count)) {
		struct fuse_req *req = ff->reserved_req;
128

129
		if (sync) {
130
			req->background = 0;
131
132
133
134
135
			fuse_request_send(ff->fc, req);
			path_put(&req->misc.release.path);
			fuse_put_request(ff->fc, req);
		} else {
			req->end = fuse_release_end;
136
			req->background = 1;
137
138
			fuse_request_send_background(ff->fc, req);
		}
139
140
141
142
		kfree(ff);
	}
}

143
144
int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
		 bool isdir)
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
{
	struct fuse_open_out outarg;
	struct fuse_file *ff;
	int err;
	int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;

	ff = fuse_file_alloc(fc);
	if (!ff)
		return -ENOMEM;

	err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
	if (err) {
		fuse_file_free(ff);
		return err;
	}

	if (isdir)
		outarg.open_flags &= ~FOPEN_DIRECT_IO;

	ff->fh = outarg.fh;
	ff->nodeid = nodeid;
	ff->open_flags = outarg.open_flags;
	file->private_data = fuse_file_get(ff);

	return 0;
}
171
EXPORT_SYMBOL_GPL(fuse_do_open);
172

173
void fuse_finish_open(struct inode *inode, struct file *file)
174
{
175
	struct fuse_file *ff = file->private_data;
176
	struct fuse_conn *fc = get_fuse_conn(inode);
177
178

	if (ff->open_flags & FOPEN_DIRECT_IO)
179
		file->f_op = &fuse_direct_io_file_operations;
180
	if (!(ff->open_flags & FOPEN_KEEP_CACHE))
Miklos Szeredi's avatar
Miklos Szeredi committed
181
		invalidate_inode_pages2(inode->i_mapping);
182
	if (ff->open_flags & FOPEN_NONSEEKABLE)
Tejun Heo's avatar
Tejun Heo committed
183
		nonseekable_open(inode, file);
184
185
186
187
188
189
190
191
192
	if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
		struct fuse_inode *fi = get_fuse_inode(inode);

		spin_lock(&fc->lock);
		fi->attr_version = ++fc->attr_version;
		i_size_write(inode, 0);
		spin_unlock(&fc->lock);
		fuse_invalidate_attr(inode);
	}
193
194
}

195
int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
196
{
Tejun Heo's avatar
Tejun Heo committed
197
	struct fuse_conn *fc = get_fuse_conn(inode);
198
199
200
201
202
203
	int err;

	err = generic_file_open(inode, file);
	if (err)
		return err;

204
	err = fuse_do_open(fc, get_node_id(inode), file, isdir);
205
	if (err)
206
		return err;
207

208
209
210
	fuse_finish_open(inode, file);

	return 0;
211
212
}

213
static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
214
{
215
	struct fuse_conn *fc = ff->fc;
216
	struct fuse_req *req = ff->reserved_req;
217
	struct fuse_release_in *inarg = &req->misc.release.in;
218

219
220
221
222
223
224
	spin_lock(&fc->lock);
	list_del(&ff->write_entry);
	if (!RB_EMPTY_NODE(&ff->polled_node))
		rb_erase(&ff->polled_node, &fc->polled_files);
	spin_unlock(&fc->lock);

225
	wake_up_interruptible_all(&ff->poll_wait);
226

227
	inarg->fh = ff->fh;
228
	inarg->flags = flags;
229
	req->in.h.opcode = opcode;
230
	req->in.h.nodeid = ff->nodeid;
231
232
233
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(struct fuse_release_in);
	req->in.args[0].value = inarg;
234
235
}

236
void fuse_release_common(struct file *file, int opcode)
237
{
Tejun Heo's avatar
Tejun Heo committed
238
239
	struct fuse_file *ff;
	struct fuse_req *req;
240

Tejun Heo's avatar
Tejun Heo committed
241
242
	ff = file->private_data;
	if (unlikely(!ff))
243
		return;
Tejun Heo's avatar
Tejun Heo committed
244
245

	req = ff->reserved_req;
246
	fuse_prepare_release(ff, file->f_flags, opcode);
Tejun Heo's avatar
Tejun Heo committed
247

Miklos Szeredi's avatar
Miklos Szeredi committed
248
249
250
251
252
253
	if (ff->flock) {
		struct fuse_release_in *inarg = &req->misc.release.in;
		inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
		inarg->lock_owner = fuse_lock_owner_id(ff->fc,
						       (fl_owner_t) file);
	}
Tejun Heo's avatar
Tejun Heo committed
254
	/* Hold vfsmount and dentry until release is finished */
255
256
	path_get(&file->f_path);
	req->misc.release.path = file->f_path;
Tejun Heo's avatar
Tejun Heo committed
257
258
259
260
261

	/*
	 * Normally this will send the RELEASE request, however if
	 * some asynchronous READ or WRITE requests are outstanding,
	 * the sending will be delayed.
262
263
264
265
	 *
	 * Make the release synchronous if this is a fuseblk mount,
	 * synchronous RELEASE is allowed (and desirable) in this case
	 * because the server can be trusted not to screw up.
Tejun Heo's avatar
Tejun Heo committed
266
	 */
267
	fuse_file_put(ff, ff->fc->destroy_req != NULL);
268
269
}

270
271
static int fuse_open(struct inode *inode, struct file *file)
{
272
	return fuse_open_common(inode, file, false);
273
274
275
276
}

static int fuse_release(struct inode *inode, struct file *file)
{
277
278
279
280
281
282
283
284
285
286
287
	fuse_release_common(file, FUSE_RELEASE);

	/* return value is ignored by VFS */
	return 0;
}

void fuse_sync_release(struct fuse_file *ff, int flags)
{
	WARN_ON(atomic_read(&ff->count) > 1);
	fuse_prepare_release(ff, flags, FUSE_RELEASE);
	ff->reserved_req->force = 1;
288
	ff->reserved_req->background = 0;
289
290
291
	fuse_request_send(ff->fc, ff->reserved_req);
	fuse_put_request(ff->fc, ff->reserved_req);
	kfree(ff);
292
}
293
EXPORT_SYMBOL_GPL(fuse_sync_release);
294

295
/*
296
297
 * Scramble the ID space with XTEA, so that the value of the files_struct
 * pointer is not exposed to userspace.
298
 */
299
u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
300
{
301
302
303
304
305
306
307
308
309
310
311
312
313
314
	u32 *k = fc->scramble_key;
	u64 v = (unsigned long) id;
	u32 v0 = v;
	u32 v1 = v >> 32;
	u32 sum = 0;
	int i;

	for (i = 0; i < 32; i++) {
		v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
		sum += 0x9E3779B9;
		v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
	}

	return (u64) v0 + ((u64) v1 << 32);
315
316
}

Miklos Szeredi's avatar
Miklos Szeredi committed
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
/*
 * Check if page is under writeback
 *
 * This is currently done by walking the list of writepage requests
 * for the inode, which can be pretty inefficient.
 */
static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct fuse_req *req;
	bool found = false;

	spin_lock(&fc->lock);
	list_for_each_entry(req, &fi->writepages, writepages_entry) {
		pgoff_t curr_index;

		BUG_ON(req->inode != inode);
		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
		if (curr_index == index) {
			found = true;
			break;
		}
	}
	spin_unlock(&fc->lock);

	return found;
}

/*
 * Wait for page writeback to be completed.
 *
 * Since fuse doesn't rely on the VM writeback tracking, this has to
 * use some other means.
 */
static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
{
	struct fuse_inode *fi = get_fuse_inode(inode);

	wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
	return 0;
}

360
static int fuse_flush(struct file *file, fl_owner_t id)
361
{
362
	struct inode *inode = file_inode(file);
363
364
365
366
367
368
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_file *ff = file->private_data;
	struct fuse_req *req;
	struct fuse_flush_in inarg;
	int err;

369
370
371
	if (is_bad_inode(inode))
		return -EIO;

372
373
374
	if (fc->no_flush)
		return 0;

375
	req = fuse_get_req_nofail_nopages(fc, file);
376
377
	memset(&inarg, 0, sizeof(inarg));
	inarg.fh = ff->fh;
378
	inarg.lock_owner = fuse_lock_owner_id(fc, id);
379
380
381
382
383
	req->in.h.opcode = FUSE_FLUSH;
	req->in.h.nodeid = get_node_id(inode);
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(inarg);
	req->in.args[0].value = &inarg;
384
	req->force = 1;
385
	fuse_request_send(fc, req);
386
387
388
389
390
391
392
393
394
	err = req->out.h.error;
	fuse_put_request(fc, req);
	if (err == -ENOSYS) {
		fc->no_flush = 1;
		err = 0;
	}
	return err;
}

Miklos Szeredi's avatar
Miklos Szeredi committed
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
/*
 * Wait for all pending writepages on the inode to finish.
 *
 * This is currently done by blocking further writes with FUSE_NOWRITE
 * and waiting for all sent writes to complete.
 *
 * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
 * could conflict with truncation.
 */
static void fuse_sync_writes(struct inode *inode)
{
	fuse_set_nowrite(inode);
	fuse_release_nowrite(inode);
}

410
411
int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
		      int datasync, int isdir)
412
{
413
	struct inode *inode = file->f_mapping->host;
414
415
416
417
418
419
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_file *ff = file->private_data;
	struct fuse_req *req;
	struct fuse_fsync_in inarg;
	int err;

420
421
422
	if (is_bad_inode(inode))
		return -EIO;

423
424
425
426
	err = filemap_write_and_wait_range(inode->i_mapping, start, end);
	if (err)
		return err;

427
	if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
428
429
		return 0;

430
431
	mutex_lock(&inode->i_mutex);

Miklos Szeredi's avatar
Miklos Szeredi committed
432
433
434
435
436
437
438
	/*
	 * Start writeback against all dirty pages of the inode, then
	 * wait for all outstanding writes, before sending the FSYNC
	 * request.
	 */
	err = write_inode_now(inode, 0);
	if (err)
439
		goto out;
Miklos Szeredi's avatar
Miklos Szeredi committed
440
441
442

	fuse_sync_writes(inode);

443
	req = fuse_get_req_nopages(fc);
444
445
446
447
	if (IS_ERR(req)) {
		err = PTR_ERR(req);
		goto out;
	}
448
449
450
451

	memset(&inarg, 0, sizeof(inarg));
	inarg.fh = ff->fh;
	inarg.fsync_flags = datasync ? 1 : 0;
452
	req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
453
454
455
456
	req->in.h.nodeid = get_node_id(inode);
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(inarg);
	req->in.args[0].value = &inarg;
457
	fuse_request_send(fc, req);
458
459
460
	err = req->out.h.error;
	fuse_put_request(fc, req);
	if (err == -ENOSYS) {
461
462
463
464
		if (isdir)
			fc->no_fsyncdir = 1;
		else
			fc->no_fsync = 1;
465
466
		err = 0;
	}
467
468
out:
	mutex_unlock(&inode->i_mutex);
469
470
471
	return err;
}

472
473
static int fuse_fsync(struct file *file, loff_t start, loff_t end,
		      int datasync)
474
{
475
	return fuse_fsync_common(file, start, end, datasync, 0);
476
477
}

478
479
void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
		    size_t count, int opcode)
480
{
481
	struct fuse_read_in *inarg = &req->misc.read.in;
482
	struct fuse_file *ff = file->private_data;
483

484
485
486
	inarg->fh = ff->fh;
	inarg->offset = pos;
	inarg->size = count;
487
	inarg->flags = file->f_flags;
488
	req->in.h.opcode = opcode;
489
	req->in.h.nodeid = ff->nodeid;
490
491
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(struct fuse_read_in);
492
	req->in.args[0].value = inarg;
493
494
495
496
497
	req->out.argvar = 1;
	req->out.numargs = 1;
	req->out.args[0].size = count;
}

498
499
500
501
502
503
504
505
506
507
508
509
static void fuse_release_user_pages(struct fuse_req *req, int write)
{
	unsigned i;

	for (i = 0; i < req->num_pages; i++) {
		struct page *page = req->pages[i];
		if (write)
			set_page_dirty_lock(page);
		put_page(page);
	}
}

Maxim Patlasov's avatar
Maxim Patlasov committed
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
/**
 * In case of short read, the caller sets 'pos' to the position of
 * actual end of fuse request in IO request. Otherwise, if bytes_requested
 * == bytes_transferred or rw == WRITE, the caller sets 'pos' to -1.
 *
 * An example:
 * User requested DIO read of 64K. It was splitted into two 32K fuse requests,
 * both submitted asynchronously. The first of them was ACKed by userspace as
 * fully completed (req->out.args[0].size == 32K) resulting in pos == -1. The
 * second request was ACKed as short, e.g. only 1K was read, resulting in
 * pos == 33K.
 *
 * Thus, when all fuse requests are completed, the minimal non-negative 'pos'
 * will be equal to the length of the longest contiguous fragment of
 * transferred data starting from the beginning of IO request.
 */
static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
{
	int left;

	spin_lock(&io->lock);
	if (err)
		io->err = io->err ? : err;
	else if (pos >= 0 && (io->bytes < 0 || pos < io->bytes))
		io->bytes = pos;

	left = --io->reqs;
	spin_unlock(&io->lock);

	if (!left) {
		long res;

		if (io->err)
			res = io->err;
		else if (io->bytes >= 0 && io->write)
			res = -EIO;
		else {
			res = io->bytes < 0 ? io->size : io->bytes;

			if (!is_sync_kiocb(io->iocb)) {
				struct path *path = &io->iocb->ki_filp->f_path;
				struct inode *inode = path->dentry->d_inode;
				struct fuse_conn *fc = get_fuse_conn(inode);
				struct fuse_inode *fi = get_fuse_inode(inode);

				spin_lock(&fc->lock);
				fi->attr_version = ++fc->attr_version;
				spin_unlock(&fc->lock);
			}
		}

		aio_complete(io->iocb, res, 0);
		kfree(io);
	}
}

static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
{
	struct fuse_io_priv *io = req->io;
	ssize_t pos = -1;

	fuse_release_user_pages(req, !io->write);

	if (io->write) {
		if (req->misc.write.in.size != req->misc.write.out.size)
			pos = req->misc.write.in.offset - io->offset +
				req->misc.write.out.size;
	} else {
		if (req->misc.read.in.size != req->out.args[0].size)
			pos = req->misc.read.in.offset - io->offset +
				req->out.args[0].size;
	}

	fuse_aio_complete(io, req->out.h.error, pos);
}

static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req,
		size_t num_bytes, struct fuse_io_priv *io)
{
	spin_lock(&io->lock);
	io->size += num_bytes;
	io->reqs++;
	spin_unlock(&io->lock);

	req->io = io;
	req->end = fuse_aio_complete_req;

597
	__fuse_get_request(req);
Maxim Patlasov's avatar
Maxim Patlasov committed
598
599
600
601
602
	fuse_request_send_background(fc, req);

	return num_bytes;
}

603
static size_t fuse_send_read(struct fuse_req *req, struct fuse_io_priv *io,
604
			     loff_t pos, size_t count, fl_owner_t owner)
605
{
606
	struct file *file = io->file;
607
608
	struct fuse_file *ff = file->private_data;
	struct fuse_conn *fc = ff->fc;
609

610
	fuse_read_fill(req, file, pos, count, FUSE_READ);
611
	if (owner != NULL) {
612
		struct fuse_read_in *inarg = &req->misc.read.in;
613
614
615
616

		inarg->read_flags |= FUSE_READ_LOCKOWNER;
		inarg->lock_owner = fuse_lock_owner_id(fc, owner);
	}
617
618
619
620

	if (io->async)
		return fuse_async_req_send(fc, req, count, io);

621
	fuse_request_send(fc, req);
622
	return req->out.args[0].size;
623
624
}

625
626
627
628
629
630
631
632
633
634
635
636
637
638
static void fuse_read_update_size(struct inode *inode, loff_t size,
				  u64 attr_ver)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);

	spin_lock(&fc->lock);
	if (attr_ver == fi->attr_version && size < inode->i_size) {
		fi->attr_version = ++fc->attr_version;
		i_size_write(inode, size);
	}
	spin_unlock(&fc->lock);
}

639
640
static int fuse_readpage(struct file *file, struct page *page)
{
641
	struct fuse_io_priv io = { .async = 0, .file = file };
642
643
	struct inode *inode = page->mapping->host;
	struct fuse_conn *fc = get_fuse_conn(inode);
644
	struct fuse_req *req;
645
646
647
648
	size_t num_read;
	loff_t pos = page_offset(page);
	size_t count = PAGE_CACHE_SIZE;
	u64 attr_ver;
649
650
651
652
653
654
	int err;

	err = -EIO;
	if (is_bad_inode(inode))
		goto out;

Miklos Szeredi's avatar
Miklos Szeredi committed
655
	/*
Lucas De Marchi's avatar
Lucas De Marchi committed
656
	 * Page writeback can extend beyond the lifetime of the
Miklos Szeredi's avatar
Miklos Szeredi committed
657
658
659
660
661
	 * page-cache page, so make sure we read a properly synced
	 * page.
	 */
	fuse_wait_on_page_writeback(inode, page->index);

662
	req = fuse_get_req(fc, 1);
663
664
	err = PTR_ERR(req);
	if (IS_ERR(req))
665
666
		goto out;

667
668
	attr_ver = fuse_get_attr_version(fc);

669
	req->out.page_zeroing = 1;
670
	req->out.argpages = 1;
671
672
	req->num_pages = 1;
	req->pages[0] = page;
673
	req->page_descs[0].length = count;
674
	num_read = fuse_send_read(req, &io, pos, count, NULL);
675
676
	err = req->out.h.error;
	fuse_put_request(fc, req);
677
678
679
680
681
682
683
684

	if (!err) {
		/*
		 * Short read means EOF.  If file size is larger, truncate it
		 */
		if (num_read < count)
			fuse_read_update_size(inode, pos + num_read, attr_ver);

685
		SetPageUptodate(page);
686
687
	}

688
	fuse_invalidate_attr(inode); /* atime changed */
689
690
691
692
693
 out:
	unlock_page(page);
	return err;
}

694
static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
695
{
696
	int i;
697
698
	size_t count = req->misc.read.in.size;
	size_t num_read = req->out.args[0].size;
699
	struct address_space *mapping = NULL;
700

701
702
	for (i = 0; mapping == NULL && i < req->num_pages; i++)
		mapping = req->pages[i]->mapping;
703

704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
	if (mapping) {
		struct inode *inode = mapping->host;

		/*
		 * Short read means EOF. If file size is larger, truncate it
		 */
		if (!req->out.h.error && num_read < count) {
			loff_t pos;

			pos = page_offset(req->pages[0]) + num_read;
			fuse_read_update_size(inode, pos,
					      req->misc.read.attr_ver);
		}
		fuse_invalidate_attr(inode); /* atime changed */
	}
719

720
721
722
723
	for (i = 0; i < req->num_pages; i++) {
		struct page *page = req->pages[i];
		if (!req->out.h.error)
			SetPageUptodate(page);
724
725
		else
			SetPageError(page);
726
		unlock_page(page);
727
		page_cache_release(page);
728
	}
729
	if (req->ff)
730
		fuse_file_put(req->ff, false);
731
732
}

733
static void fuse_send_readpages(struct fuse_req *req, struct file *file)
734
{
735
736
	struct fuse_file *ff = file->private_data;
	struct fuse_conn *fc = ff->fc;
737
738
	loff_t pos = page_offset(req->pages[0]);
	size_t count = req->num_pages << PAGE_CACHE_SHIFT;
739
740

	req->out.argpages = 1;
741
	req->out.page_zeroing = 1;
742
	req->out.page_replace = 1;
743
	fuse_read_fill(req, file, pos, count, FUSE_READ);
744
	req->misc.read.attr_ver = fuse_get_attr_version(fc);
745
	if (fc->async_read) {
746
		req->ff = fuse_file_get(ff);
747
		req->end = fuse_readpages_end;
748
		fuse_request_send_background(fc, req);
749
	} else {
750
		fuse_request_send(fc, req);
751
		fuse_readpages_end(fc, req);
752
		fuse_put_request(fc, req);
753
	}
754
755
}

756
struct fuse_fill_data {
757
	struct fuse_req *req;
758
	struct file *file;
759
	struct inode *inode;
Maxim Patlasov's avatar
Maxim Patlasov committed
760
	unsigned nr_pages;
761
762
763
764
};

static int fuse_readpages_fill(void *_data, struct page *page)
{
765
	struct fuse_fill_data *data = _data;
766
767
768
769
	struct fuse_req *req = data->req;
	struct inode *inode = data->inode;
	struct fuse_conn *fc = get_fuse_conn(inode);

Miklos Szeredi's avatar
Miklos Szeredi committed
770
771
	fuse_wait_on_page_writeback(inode, page->index);

772
773
774
775
	if (req->num_pages &&
	    (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
	     (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
	     req->pages[req->num_pages - 1]->index + 1 != page->index)) {
Maxim Patlasov's avatar
Maxim Patlasov committed
776
777
		int nr_alloc = min_t(unsigned, data->nr_pages,
				     FUSE_MAX_PAGES_PER_REQ);
778
		fuse_send_readpages(req, data->file);
779
780
781
782
783
784
		if (fc->async_read)
			req = fuse_get_req_for_background(fc, nr_alloc);
		else
			req = fuse_get_req(fc, nr_alloc);

		data->req = req;
785
		if (IS_ERR(req)) {
786
			unlock_page(page);
787
			return PTR_ERR(req);
788
789
		}
	}
Maxim Patlasov's avatar
Maxim Patlasov committed
790
791
792
793
794
795

	if (WARN_ON(req->num_pages >= req->max_pages)) {
		fuse_put_request(fc, req);
		return -EIO;
	}

796
	page_cache_get(page);
797
	req->pages[req->num_pages] = page;
798
	req->page_descs[req->num_pages].length = PAGE_SIZE;
Miklos Szeredi's avatar
Miklos Szeredi committed
799
	req->num_pages++;
Maxim Patlasov's avatar
Maxim Patlasov committed
800
	data->nr_pages--;
801
802
803
804
805
806
807
808
	return 0;
}

static int fuse_readpages(struct file *file, struct address_space *mapping,
			  struct list_head *pages, unsigned nr_pages)
{
	struct inode *inode = mapping->host;
	struct fuse_conn *fc = get_fuse_conn(inode);
809
	struct fuse_fill_data data;
810
	int err;
Maxim Patlasov's avatar
Maxim Patlasov committed
811
	int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ);
812

813
	err = -EIO;
814
	if (is_bad_inode(inode))
815
		goto out;
816

817
	data.file = file;
818
	data.inode = inode;
819
820
821
822
	if (fc->async_read)
		data.req = fuse_get_req_for_background(fc, nr_alloc);
	else
		data.req = fuse_get_req(fc, nr_alloc);
Maxim Patlasov's avatar
Maxim Patlasov committed
823
	data.nr_pages = nr_pages;
824
	err = PTR_ERR(data.req);
825
	if (IS_ERR(data.req))
826
		goto out;
827
828

	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
829
830
	if (!err) {
		if (data.req->num_pages)
831
			fuse_send_readpages(data.req, file);
832
833
834
		else
			fuse_put_request(fc, data.req);
	}
835
out:
836
	return err;
837
838
}

Miklos Szeredi's avatar
Miklos Szeredi committed
839
840
841
842
static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
				  unsigned long nr_segs, loff_t pos)
{
	struct inode *inode = iocb->ki_filp->f_mapping->host;
843
	struct fuse_conn *fc = get_fuse_conn(inode);
Miklos Szeredi's avatar
Miklos Szeredi committed
844

845
846
847
848
849
850
851
	/*
	 * In auto invalidate mode, always update attributes on read.
	 * Otherwise, only update if we attempt to read past EOF (to ensure
	 * i_size is up to date).
	 */
	if (fc->auto_inval_data ||
	    (pos + iov_length(iov, nr_segs) > i_size_read(inode))) {
Miklos Szeredi's avatar
Miklos Szeredi committed
852
853
854
855
856
857
858
859
860
		int err;
		err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL);
		if (err)
			return err;
	}

	return generic_file_aio_read(iocb, iov, nr_segs, pos);
}

861
static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
862
			    loff_t pos, size_t count)
863
{
864
865
	struct fuse_write_in *inarg = &req->misc.write.in;
	struct fuse_write_out *outarg = &req->misc.write.out;
866

867
868
869
	inarg->fh = ff->fh;
	inarg->offset = pos;
	inarg->size = count;
870
	req->in.h.opcode = FUSE_WRITE;
871
	req->in.h.nodeid = ff->nodeid;
872
	req->in.numargs = 2;
873
	if (ff->fc->minor < 9)
874
875
876
		req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
	else
		req->in.args[0].size = sizeof(struct fuse_write_in);
877
	req->in.args[0].value = inarg;
878
879
880
	req->in.args[1].size = count;
	req->out.numargs = 1;
	req->out.args[0].size = sizeof(struct fuse_write_out);
881
882
883
	req->out.args[0].value = outarg;
}

884
static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
885
			      loff_t pos, size_t count, fl_owner_t owner)
886
{
887
	struct file *file = io->file;
888
889
	struct fuse_file *ff = file->private_data;
	struct fuse_conn *fc = ff->fc;
890
891
	struct fuse_write_in *inarg = &req->misc.write.in;

892
	fuse_write_fill(req, ff, pos, count);
893
	inarg->flags = file->f_flags;
894
895
896
897
	if (owner != NULL) {
		inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
		inarg->lock_owner = fuse_lock_owner_id(fc, owner);
	}
898
899
900
901

	if (io->async)
		return fuse_async_req_send(fc, req, count, io);

902
	fuse_request_send(fc, req);
903
	return req->misc.write.out.size;
904
905
}

Miklos Szeredi's avatar
Miklos Szeredi committed
906
void fuse_write_update_size(struct inode *inode, loff_t pos)
907
908
909
910
911
912
913
914
915
916
917
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);

	spin_lock(&fc->lock);
	fi->attr_version = ++fc->attr_version;
	if (pos > inode->i_size)
		i_size_write(inode, pos);
	spin_unlock(&fc->lock);
}

Nick Piggin's avatar
Nick Piggin committed
918
919
920
921
922
923
924
static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
				    struct inode *inode, loff_t pos,
				    size_t count)
{
	size_t res;
	unsigned offset;
	unsigned i;
925
	struct fuse_io_priv io = { .async = 0, .file = file };
Nick Piggin's avatar
Nick Piggin committed
926
927
928
929

	for (i = 0; i < req->num_pages; i++)
		fuse_wait_on_page_writeback(inode, req->pages[i]->index);

930
	res = fuse_send_write(req, &io, pos, count, NULL);
Nick Piggin's avatar
Nick Piggin committed
931

932
	offset = req->page_descs[0].offset;
Nick Piggin's avatar
Nick Piggin committed
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
	count = res;
	for (i = 0; i < req->num_pages; i++) {
		struct page *page = req->pages[i];

		if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE)
			SetPageUptodate(page);

		if (count > PAGE_CACHE_SIZE - offset)
			count -= PAGE_CACHE_SIZE - offset;
		else
			count = 0;
		offset = 0;

		unlock_page(page);
		page_cache_release(page);
	}

	return res;
}

static ssize_t fuse_fill_write_pages(struct fuse_req *req,
			       struct address_space *mapping,
			       struct iov_iter *ii, loff_t pos)
{
	struct fuse_conn *fc = get_fuse_conn(mapping->host);
	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
	size_t count = 0;
	int err;

962
	req->in.argpages = 1;
963
	req->page_descs[0].offset = offset;
Nick Piggin's avatar
Nick Piggin committed
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979

	do {
		size_t tmp;
		struct page *page;
		pgoff_t index = pos >> PAGE_CACHE_SHIFT;
		size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset,
				     iov_iter_count(ii));

		bytes = min_t(size_t, bytes, fc->max_write - count);

 again:
		err = -EFAULT;
		if (iov_iter_fault_in_readable(ii, bytes))
			break;

		err = -ENOMEM;
980
		page = grab_cache_page_write_begin(mapping, index, 0);
Nick Piggin's avatar
Nick Piggin committed
981
982
983
		if (!page)
			break;

984
985
986
		if (mapping_writably_mapped(mapping))
			flush_dcache_page(page);

Nick Piggin's avatar
Nick Piggin committed
987
988
989
990
991
		pagefault_disable();
		tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
		pagefault_enable();
		flush_dcache_page(page);

992
993
		mark_page_accessed(page);

Nick Piggin's avatar
Nick Piggin committed
994
995
996
997
998
999
1000
1001
1002
		if (!tmp) {
			unlock_page(page);
			page_cache_release(page);
			bytes = min(bytes, iov_iter_single_seg_count(ii));
			goto again;
		}

		err = 0;
		req->pages[req->num_pages] = page;
1003
		req->page_descs[req->num_pages].length = tmp;
Nick Piggin's avatar
Nick Piggin committed
1004
1005
1006
1007
1008
1009
1010
1011
1012
		req->num_pages++;

		iov_iter_advance(ii, tmp);
		count += tmp;
		pos += tmp;
		offset += tmp;
		if (offset == PAGE_CACHE_SIZE)
			offset = 0;

1013
1014
		if (!fc->big_writes)
			break;
Nick Piggin's avatar
Nick Piggin committed
1015
	} while (iov_iter_count(ii) && count < fc->max_write &&
1016
		 req->num_pages < req->max_pages && offset == 0);
Nick Piggin's avatar
Nick Piggin committed
1017
1018
1019
1020

	return count > 0 ? count : err;
}

1021
1022
1023
1024
1025
1026
1027
1028
static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
{
	return min_t(unsigned,
		     ((pos + len - 1) >> PAGE_CACHE_SHIFT) -
		     (pos >> PAGE_CACHE_SHIFT) + 1,
		     FUSE_MAX_PAGES_PER_REQ);
}

Nick Piggin's avatar
Nick Piggin committed
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
static ssize_t fuse_perform_write(struct file *file,
				  struct address_space *mapping,
				  struct iov_iter *ii, loff_t pos)
{
	struct inode *inode = mapping->host;
	struct fuse_conn *fc = get_fuse_conn(inode);
	int err = 0;
	ssize_t res = 0;

	if (is_bad_inode(inode))
		return -EIO;

	do {
		struct fuse_req *req;
		ssize_t count;
1044
		unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
Nick Piggin's avatar
Nick Piggin committed
1045

1046
		req = fuse_get_req(fc, nr_pages);
Nick Piggin's avatar
Nick Piggin committed
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
		if (IS_ERR(req)) {
			err = PTR_ERR(req);
			break;
		}

		count = fuse_fill_write_pages(req, mapping, ii, pos);
		if (count <= 0) {
			err = count;
		} else {
			size_t num_written;

			num_written = fuse_send_write_pages(req, file, inode,
							    pos, count);
			err = req->out.h.error;
			if (!err) {
				res += num_written;
				pos += num_written;

				/* break out of the loop on short write */
				if (num_written != count)
					err = -EIO;
			}
		}
		fuse_put_request(fc, req);
	} while (!err && iov_iter_count(ii));

	if (res > 0)
		fuse_write_update_size(inode, pos);

	fuse_invalidate_attr(inode);

	return res > 0 ? res : err;
}

static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
				   unsigned long nr_segs, loff_t pos)
{
	struct file *file = iocb->ki_filp;
	struct address_space *mapping = file->f_mapping;
	size_t count = 0;
Anand Avati's avatar
Anand Avati committed
1087
	size_t ocount = 0;
Nick Piggin's avatar
Nick Piggin committed
1088
	ssize_t written = 0;
Anand Avati's avatar
Anand Avati committed
1089
	ssize_t written_buffered = 0;
Nick Piggin's avatar
Nick Piggin committed
1090
1091
1092
	struct inode *inode = mapping->host;
	ssize_t err;
	struct iov_iter i;
Anand Avati's avatar
Anand Avati committed
1093
	loff_t endbyte = 0;
Nick Piggin's avatar
Nick Piggin committed
1094
1095
1096

	WARN_ON(iocb->ki_pos != pos);

Anand Avati's avatar
Anand Avati committed
1097
1098
	ocount = 0;
	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
Nick Piggin's avatar
Nick Piggin committed
1099
1100
1101
	if (err)
		return err;

Anand Avati's avatar
Anand Avati committed
1102
	count = ocount;
Nick Piggin's avatar
Nick Piggin committed
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
	mutex_lock(&inode->i_mutex);

	/* We can write back this queue in page reclaim */
	current->backing_dev_info = mapping->backing_dev_info;

	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
	if (err)
		goto out;

	if (count == 0)
		goto out;

1115
	err = file_remove_suid(file);
Nick Piggin's avatar
Nick Piggin committed
1116
1117
1118
	if (err)
		goto out;

1119
1120
1121
	err = file_update_time(file);
	if (err)
		goto out;
Nick Piggin's avatar
Nick Piggin committed
1122

Anand Avati's avatar
Anand Avati committed
1123
1124
1125
1126
1127
1128
1129
1130
1131
	if (file->f_flags & O_DIRECT) {
		written = generic_file_direct_write(iocb, iov, &nr_segs,
						    pos, &iocb->ki_pos,
						    count, ocount);
		if (written < 0 || written == count)
			goto out;

		pos += written;
		count -= written;
Nick Piggin's avatar
Nick Piggin committed
1132

Anand Avati's avatar
Anand Avati committed
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
		iov_iter_init(&i, iov, nr_segs, count, written);
		written_buffered = fuse_perform_write(file, mapping, &i, pos);
		if (written_buffered < 0) {
			err = written_buffered;
			goto out;
		}
		endbyte = pos + written_buffered - 1;

		err = filemap_write_and_wait_range(file->f_mapping, pos,
						   endbyte);
		if (err)
			goto out;

		invalidate_mapping_pages(file->f_mapping,
					 pos >> PAGE_CACHE_SHIFT,
					 endbyte >> PAGE_CACHE_SHIFT);

		written += written_buffered;
		iocb->ki_pos = pos + written_buffered;
	} else {
		iov_iter_init(&i, iov, nr_segs, count, 0);
		written = fuse_perform_write(file, mapping, &i, pos);
		if (written >= 0)
			iocb->ki_pos = pos + written;
	}
Nick Piggin's avatar
Nick Piggin committed
1158
1159
1160
1161
1162
1163
1164
out:
	current->backing_dev_info = NULL;
	mutex_unlock(&inode->i_mutex);

	return written ? written : err;
}

1165
1166
static inline void fuse_page_descs_length_init(struct fuse_req *req,
		unsigned index, unsigned nr_pages)
1167
1168
1169
{
	int i;

1170
	for (i = index; i < index + nr_pages; i++)
1171
1172
1173
1174
		req->page_descs[i].length = PAGE_SIZE -
			req->page_descs[i].offset;
}

1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
{
	return (unsigned long)ii->iov->iov_base + ii->iov_offset;
}

static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
					size_t max_size)
{
	return min(iov_iter_single_seg_count(ii), max_size);
}

1186
static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
1187
			       size_t *nbytesp, int write)
Miklos Szeredi's avatar
Miklos Szeredi committed
1188
{
1189
	size_t nbytes = 0;  /* # bytes already packed in req */
1190

1191
1192
	/* Special case for kernel I/O: can copy directly into the buffer */
	if (segment_eq(get_fs(), KERNEL_DS)) {
1193
1194
1195
		unsigned long user_addr = fuse_get_user_addr(ii);
		size_t frag_size = fuse_get_frag_size(ii, *nbytesp);

1196
1197
1198
1199
1200
		if (write)
			req->in.args[1].value = (void *) user_addr;
		else
			req->out.args[0].value = (void *) user_addr;

1201
1202
		iov_iter_advance(ii, frag_size);
		*nbytesp = frag_size;
1203
1204
		return 0;
	}
Miklos Szeredi's avatar
Miklos Szeredi committed