fs.h 96.3 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
#ifndef _LINUX_FS_H
#define _LINUX_FS_H

/*
 * This file has definitions for some important file table
 * structures etc.
 */

#include <linux/limits.h>
#include <linux/ioctl.h>
11
#include <linux/blk_types.h>
12
#include <linux/types.h>
13
#include <linux/percpu-rwsem.h>
Linus Torvalds's avatar
Linus Torvalds committed
14
15
16
17
18
19
20
21
22
23
24
25
26

/*
 * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
 * the file limit at runtime and only root can increase the per-process
 * nr_file rlimit, so it's safe to set up a ridiculously high absolute
 * upper limit on files-per-process.
 *
 * Some programs (notably those using select()) may have to be 
 * recompiled to take full advantage of the new limits..  
 */

/* Fixed constants first: */
#undef NR_OPEN
27
28
#define INR_OPEN_CUR 1024	/* Initial setting for nfile rlimits */
#define INR_OPEN_MAX 4096	/* Hard limit for nfile rlimits */
Linus Torvalds's avatar
Linus Torvalds committed
29
30
31
32

#define BLOCK_SIZE_BITS 10
#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)

33
34
35
#define SEEK_SET	0	/* seek relative to beginning of file */
#define SEEK_CUR	1	/* seek relative to current file position */
#define SEEK_END	2	/* seek relative to end of file */
36
37
38
#define SEEK_DATA	3	/* seek to the next data */
#define SEEK_HOLE	4	/* seek to the next hole */
#define SEEK_MAX	SEEK_HOLE
39

Lukas Czerner's avatar
Lukas Czerner committed
40
struct fstrim_range {
41
42
43
	__u64 start;
	__u64 len;
	__u64 minlen;
Lukas Czerner's avatar
Lukas Czerner committed
44
45
};

Linus Torvalds's avatar
Linus Torvalds committed
46
47
/* And dynamically-tunable limits and defaults: */
struct files_stat_struct {
48
49
50
	unsigned long nr_files;		/* read only */
	unsigned long nr_free_files;	/* read only */
	unsigned long max_files;		/* tunable */
Linus Torvalds's avatar
Linus Torvalds committed
51
52
53
54
55
};

struct inodes_stat_t {
	int nr_inodes;
	int nr_unused;
56
	int dummy[5];		/* padding for sysctl ABI compatibility */
Linus Torvalds's avatar
Linus Torvalds committed
57
58
59
60
61
};


#define NR_FILE  8192	/* this can well be larger on a larger system */

62
63
64
65
66
67
68
69
70
#define MAY_EXEC		0x00000001
#define MAY_WRITE		0x00000002
#define MAY_READ		0x00000004
#define MAY_APPEND		0x00000008
#define MAY_ACCESS		0x00000010
#define MAY_OPEN		0x00000020
#define MAY_CHDIR		0x00000040
/* called from RCU mode, don't block */
#define MAY_NOT_BLOCK		0x00000080
Linus Torvalds's avatar
Linus Torvalds committed
71

72
73
74
75
76
/*
 * flags in file.f_mode.  Note that FMODE_READ and FMODE_WRITE must correspond
 * to O_WRONLY and O_RDWR via the strange trick in __dentry_open()
 */

77
/* file is open for reading */
78
#define FMODE_READ		((__force fmode_t)0x1)
79
/* file is open for writing */
80
#define FMODE_WRITE		((__force fmode_t)0x2)
81
/* file is seekable */
82
#define FMODE_LSEEK		((__force fmode_t)0x4)
83
/* file can be accessed using pread */
84
#define FMODE_PREAD		((__force fmode_t)0x8)
85
/* file can be accessed using pwrite */
86
#define FMODE_PWRITE		((__force fmode_t)0x10)
87
/* File is opened for execution with sys_execve / sys_uselib */
88
#define FMODE_EXEC		((__force fmode_t)0x20)
89
/* File is opened with O_NDELAY (only set for block devices) */
90
#define FMODE_NDELAY		((__force fmode_t)0x40)
91
/* File is opened with O_EXCL (only set for block devices) */
92
#define FMODE_EXCL		((__force fmode_t)0x80)
93
94
/* File is opened using open(.., 3, ..) and is writeable only for ioctls
   (specialy hack for floppy.c) */
95
#define FMODE_WRITE_IOCTL	((__force fmode_t)0x100)
96
97
98
99
/* 32bit hashes as llseek() offset (for directories) */
#define FMODE_32BITHASH         ((__force fmode_t)0x200)
/* 64bit hashes as llseek() offset (for directories) */
#define FMODE_64BITHASH         ((__force fmode_t)0x400)
100

101
102
103
104
105
106
/*
 * Don't update ctime and mtime.
 *
 * Currently a special hack for the XFS open_by_handle ioctl, but we'll
 * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
 */
107
#define FMODE_NOCMTIME		((__force fmode_t)0x800)
108

109
/* Expect random access pattern */
110
#define FMODE_RANDOM		((__force fmode_t)0x1000)
111

112
113
114
/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
#define FMODE_UNSIGNED_OFFSET	((__force fmode_t)0x2000)

115
116
117
/* File is opened with O_PATH; almost nothing can be done with it */
#define FMODE_PATH		((__force fmode_t)0x4000)

Eric Paris's avatar
Eric Paris committed
118
/* File was opened by fanotify and shouldn't generate fanotify events */
119
#define FMODE_NONOTIFY		((__force fmode_t)0x1000000)
Eric Paris's avatar
Eric Paris committed
120

121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
/*
 * The below are the various read and write types that we support. Some of
 * them include behavioral modifiers that send information down to the
 * block layer and IO scheduler. Terminology:
 *
 *	The block layer uses device plugging to defer IO a little bit, in
 *	the hope that we will see more IO very shortly. This increases
 *	coalescing of adjacent IO and thus reduces the number of IOs we
 *	have to send to the device. It also allows for better queuing,
 *	if the IO isn't mergeable. If the caller is going to be waiting
 *	for the IO, then he must ensure that the device is unplugged so
 *	that the IO is dispatched to the driver.
 *
 *	All IO is handled async in Linux. This is fine for background
 *	writes, but for reads or writes that someone waits for completion
 *	on, we want to notify the block layer and IO scheduler so that they
 *	know about it. That allows them to make better scheduling
 *	decisions. So when the below references 'sync' and 'async', it
 *	is referencing this priority hint.
 *
 * With that in mind, the available types are:
 *
 * READ			A normal read operation. Device will be plugged.
 * READ_SYNC		A synchronous read. Device is not plugged, caller can
 *			immediately wait on this read without caring about
 *			unplugging.
 * READA		Used for read-ahead operations. Lower priority, and the
148
 *			block layer could (in theory) choose to ignore this
149
150
 *			request if it runs into resource problems.
 * WRITE		A normal async write. Device will be plugged.
Jens Axboe's avatar
Jens Axboe committed
151
 * WRITE_SYNC		Synchronous write. Identical to WRITE, but passes down
152
 *			the hint that someone will be waiting on this IO
Jens Axboe's avatar
Jens Axboe committed
153
154
 *			shortly. The write equivalent of READ_SYNC.
 * WRITE_ODIRECT	Special case write for O_DIRECT only.
155
156
157
158
159
160
 * WRITE_FLUSH		Like WRITE_SYNC but with preceding cache flush.
 * WRITE_FUA		Like WRITE_SYNC but data is guaranteed to be on
 *			non-volatile media on completion.
 * WRITE_FLUSH_FUA	Combination of WRITE_FLUSH and FUA. The IO is preceded
 *			by a cache flush and data is guaranteed to be on
 *			non-volatile media on completion.
161
162
 *
 */
163
164
#define RW_MASK			REQ_WRITE
#define RWA_MASK		REQ_RAHEAD
165
166

#define READ			0
167
168
#define WRITE			RW_MASK
#define READA			RWA_MASK
169
170
#define KERNEL_READ		(READ|REQ_KERNEL)
#define KERNEL_WRITE		(WRITE|REQ_KERNEL)
171

Jens Axboe's avatar
Jens Axboe committed
172
173
174
175
176
177
#define READ_SYNC		(READ | REQ_SYNC)
#define WRITE_SYNC		(WRITE | REQ_SYNC | REQ_NOIDLE)
#define WRITE_ODIRECT		(WRITE | REQ_SYNC)
#define WRITE_FLUSH		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH)
#define WRITE_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA)
#define WRITE_FLUSH_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
Linus Torvalds's avatar
Linus Torvalds committed
178

179
180
181
182
183
184
185
186
187

/*
 * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
 * that indicates that they should check the contents of the iovec are
 * valid, but not check the memory that the iovec elements
 * points too.
 */
#define CHECK_IOVEC_ONLY -1

Linus Torvalds's avatar
Linus Torvalds committed
188
189
190
191
192
193
194
#define SEL_IN		1
#define SEL_OUT		2
#define SEL_EX		4

/* public flags for file_system_type */
#define FS_REQUIRES_DEV 1 
#define FS_BINARY_MOUNTDATA 2
195
#define FS_HAS_SUBTYPE 4
Linus Torvalds's avatar
Linus Torvalds committed
196
#define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
197
198
199
200
#define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move()
					 * during rename() internally.
					 */

Linus Torvalds's avatar
Linus Torvalds committed
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
/*
 * These are the fs-independent mount-flags: up to 32 flags are supported
 */
#define MS_RDONLY	 1	/* Mount read-only */
#define MS_NOSUID	 2	/* Ignore suid and sgid bits */
#define MS_NODEV	 4	/* Disallow access to device special files */
#define MS_NOEXEC	 8	/* Disallow program execution */
#define MS_SYNCHRONOUS	16	/* Writes are synced at once */
#define MS_REMOUNT	32	/* Alter flags of a mounted FS */
#define MS_MANDLOCK	64	/* Allow mandatory locks on an FS */
#define MS_DIRSYNC	128	/* Directory modifications are synchronous */
#define MS_NOATIME	1024	/* Do not update access times. */
#define MS_NODIRATIME	2048	/* Do not update directory access times */
#define MS_BIND		4096
#define MS_MOVE		8192
#define MS_REC		16384
217
218
219
#define MS_VERBOSE	32768	/* War is peace. Verbosity is silence.
				   MS_VERBOSE is deprecated. */
#define MS_SILENT	32768
220
#define MS_POSIXACL	(1<<16)	/* VFS does not apply the umask */
Ram Pai's avatar
Ram Pai committed
221
#define MS_UNBINDABLE	(1<<17)	/* change to unbindable */
222
#define MS_PRIVATE	(1<<18)	/* change to private */
Ram Pai's avatar
Ram Pai committed
223
#define MS_SLAVE	(1<<19)	/* change to slave */
Ram Pai's avatar
Ram Pai committed
224
#define MS_SHARED	(1<<20)	/* change to shared */
Valerie Henson's avatar
Valerie Henson committed
225
#define MS_RELATIME	(1<<21)	/* Update atime relative to mtime/ctime. */
226
#define MS_KERNMOUNT	(1<<22) /* this is a kern_mount call */
227
#define MS_I_VERSION	(1<<23) /* Update inode I_version field */
228
#define MS_STRICTATIME	(1<<24) /* Always perform atime updates */
Al Viro's avatar
Al Viro committed
229
#define MS_NOSEC	(1<<28)
Al Viro's avatar
Al Viro committed
230
#define MS_BORN		(1<<29)
Linus Torvalds's avatar
Linus Torvalds committed
231
232
233
234
235
236
#define MS_ACTIVE	(1<<30)
#define MS_NOUSER	(1<<31)

/*
 * Superblock flags that can be altered by MS_REMOUNT
 */
237
#define MS_RMT_MASK	(MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION)
Linus Torvalds's avatar
Linus Torvalds committed
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256

/*
 * Old magic mount flag and mask
 */
#define MS_MGC_VAL 0xC0ED0000
#define MS_MGC_MSK 0xffff0000

/* Inode flags - they have nothing to superblock flags now */

#define S_SYNC		1	/* Writes are synced at once */
#define S_NOATIME	2	/* Do not update access times */
#define S_APPEND	4	/* Append-only file */
#define S_IMMUTABLE	8	/* Immutable file */
#define S_DEAD		16	/* removed, but still open directory */
#define S_NOQUOTA	32	/* Inode is not counted to quota */
#define S_DIRSYNC	64	/* Directory modifications are synchronous */
#define S_NOCMTIME	128	/* Do not update file c/mtime */
#define S_SWAPFILE	256	/* Do not truncate: swapon got its bmaps */
#define S_PRIVATE	512	/* Inode is fs-internal */
257
#define S_IMA		1024	/* Inode has an associated IMA struct */
258
#define S_AUTOMOUNT	2048	/* Automount/referral quasi-directory */
259
#define S_NOSEC		4096	/* no suid or xattr security attributes */
Linus Torvalds's avatar
Linus Torvalds committed
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281

/*
 * Note that nosuid etc flags are inode-specific: setting some file-system
 * flags just means all the inodes inherit those flags by default. It might be
 * possible to override it selectively if you really wanted to with some
 * ioctl() that is not currently implemented.
 *
 * Exception: MS_RDONLY is always applied to the entire file system.
 *
 * Unfortunately, it is possible to change a filesystems flags with it mounted
 * with files in use.  This means that all of the inodes will not have their
 * i_flags updated.  Hence, i_flags no longer inherit the superblock mount
 * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org
 */
#define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg))

#define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY)
#define IS_SYNC(inode)		(__IS_FLG(inode, MS_SYNCHRONOUS) || \
					((inode)->i_flags & S_SYNC))
#define IS_DIRSYNC(inode)	(__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \
					((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
#define IS_MANDLOCK(inode)	__IS_FLG(inode, MS_MANDLOCK)
282
#define IS_NOATIME(inode)   __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
283
#define IS_I_VERSION(inode)   __IS_FLG(inode, MS_I_VERSION)
Linus Torvalds's avatar
Linus Torvalds committed
284
285
286
287
288
289
290
291
292
293

#define IS_NOQUOTA(inode)	((inode)->i_flags & S_NOQUOTA)
#define IS_APPEND(inode)	((inode)->i_flags & S_APPEND)
#define IS_IMMUTABLE(inode)	((inode)->i_flags & S_IMMUTABLE)
#define IS_POSIXACL(inode)	__IS_FLG(inode, MS_POSIXACL)

#define IS_DEADDIR(inode)	((inode)->i_flags & S_DEAD)
#define IS_NOCMTIME(inode)	((inode)->i_flags & S_NOCMTIME)
#define IS_SWAPFILE(inode)	((inode)->i_flags & S_SWAPFILE)
#define IS_PRIVATE(inode)	((inode)->i_flags & S_PRIVATE)
294
#define IS_IMA(inode)		((inode)->i_flags & S_IMA)
295
#define IS_AUTOMOUNT(inode)	((inode)->i_flags & S_AUTOMOUNT)
296
#define IS_NOSEC(inode)		((inode)->i_flags & S_NOSEC)
Linus Torvalds's avatar
Linus Torvalds committed
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326

/* the read-only stuff doesn't really belong here, but any other place is
   probably as bad and I don't want to create yet another include file. */

#define BLKROSET   _IO(0x12,93)	/* set device read-only (0 = read-write) */
#define BLKROGET   _IO(0x12,94)	/* get read-only status (0 = read_write) */
#define BLKRRPART  _IO(0x12,95)	/* re-read partition table */
#define BLKGETSIZE _IO(0x12,96)	/* return device size /512 (long *arg) */
#define BLKFLSBUF  _IO(0x12,97)	/* flush buffer cache */
#define BLKRASET   _IO(0x12,98)	/* set read ahead for block device */
#define BLKRAGET   _IO(0x12,99)	/* get current read ahead setting */
#define BLKFRASET  _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */
#define BLKFRAGET  _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */
#define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */
#define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */
#define BLKSSZGET  _IO(0x12,104)/* get block device sector size */
#if 0
#define BLKPG      _IO(0x12,105)/* See blkpg.h */

/* Some people are morons.  Do not use sizeof! */

#define BLKELVGET  _IOR(0x12,106,size_t)/* elevator get */
#define BLKELVSET  _IOW(0x12,107,size_t)/* elevator set */
/* This was here just to show that the number is taken -
   probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */
#endif
/* A jump here: 108-111 have been used for various private purposes. */
#define BLKBSZGET  _IOR(0x12,112,size_t)
#define BLKBSZSET  _IOW(0x12,113,size_t)
#define BLKGETSIZE64 _IOR(0x12,114,size_t)	/* return device size in bytes (u64 *arg) */
327
328
329
330
#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
#define BLKTRACESTART _IO(0x12,116)
#define BLKTRACESTOP _IO(0x12,117)
#define BLKTRACETEARDOWN _IO(0x12,118)
331
#define BLKDISCARD _IO(0x12,119)
Martin K. Petersen's avatar
Martin K. Petersen committed
332
333
334
335
#define BLKIOMIN _IO(0x12,120)
#define BLKIOOPT _IO(0x12,121)
#define BLKALIGNOFF _IO(0x12,122)
#define BLKPBSZGET _IO(0x12,123)
336
#define BLKDISCARDZEROES _IO(0x12,124)
Adrian Hunter's avatar
Adrian Hunter committed
337
#define BLKSECDISCARD _IO(0x12,125)
338
#define BLKROTATIONAL _IO(0x12,126)
339
#define BLKZEROOUT _IO(0x12,127)
Linus Torvalds's avatar
Linus Torvalds committed
340
341
342
343

#define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
#define FIBMAP	   _IO(0x00,1)	/* bmap access */
#define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
344
345
#define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
#define FITHAW		_IOWR('X', 120, int)	/* Thaw */
Lukas Czerner's avatar
Lukas Czerner committed
346
#define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
Linus Torvalds's avatar
Linus Torvalds committed
347

348
349
350
351
#define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
#define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
#define	FS_IOC_GETVERSION		_IOR('v', 1, long)
#define	FS_IOC_SETVERSION		_IOW('v', 2, long)
Mark Fasheh's avatar
Mark Fasheh committed
352
#define FS_IOC_FIEMAP			_IOWR('f', 11, struct fiemap)
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
#define FS_IOC32_GETFLAGS		_IOR('f', 1, int)
#define FS_IOC32_SETFLAGS		_IOW('f', 2, int)
#define FS_IOC32_GETVERSION		_IOR('v', 1, int)
#define FS_IOC32_SETVERSION		_IOW('v', 2, int)

/*
 * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
 */
#define	FS_SECRM_FL			0x00000001 /* Secure deletion */
#define	FS_UNRM_FL			0x00000002 /* Undelete */
#define	FS_COMPR_FL			0x00000004 /* Compress file */
#define FS_SYNC_FL			0x00000008 /* Synchronous updates */
#define FS_IMMUTABLE_FL			0x00000010 /* Immutable file */
#define FS_APPEND_FL			0x00000020 /* writes to file may only append */
#define FS_NODUMP_FL			0x00000040 /* do not dump file */
#define FS_NOATIME_FL			0x00000080 /* do not update atime */
/* Reserved for compression usage... */
#define FS_DIRTY_FL			0x00000100
#define FS_COMPRBLK_FL			0x00000200 /* One or more compressed clusters */
#define FS_NOCOMP_FL			0x00000400 /* Don't compress */
#define FS_ECOMPR_FL			0x00000800 /* Compression error */
/* End compression flags --- maybe not all used */
#define FS_BTREE_FL			0x00001000 /* btree format dir */
#define FS_INDEX_FL			0x00001000 /* hash-indexed directory */
#define FS_IMAGIC_FL			0x00002000 /* AFS directory */
#define FS_JOURNAL_DATA_FL		0x00004000 /* Reserved for ext3 */
#define FS_NOTAIL_FL			0x00008000 /* file tail should not be merged */
#define FS_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
#define FS_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
382
383
#define FS_EXTENT_FL			0x00080000 /* Extents */
#define FS_DIRECTIO_FL			0x00100000 /* Use direct i/o */
liubo's avatar
liubo committed
384
#define FS_NOCOW_FL			0x00800000 /* Do not cow file */
385
386
387
388
389
390
#define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */

#define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
#define FS_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */


391
392
393
394
#define SYNC_FILE_RANGE_WAIT_BEFORE	1
#define SYNC_FILE_RANGE_WRITE		2
#define SYNC_FILE_RANGE_WAIT_AFTER	4

Linus Torvalds's avatar
Linus Torvalds committed
395
396
397
398
399
400
#ifdef __KERNEL__

#include <linux/linkage.h>
#include <linux/wait.h>
#include <linux/kdev_t.h>
#include <linux/dcache.h>
401
#include <linux/path.h>
Linus Torvalds's avatar
Linus Torvalds committed
402
403
404
405
406
407
#include <linux/stat.h>
#include <linux/cache.h>
#include <linux/list.h>
#include <linux/radix-tree.h>
#include <linux/prio_tree.h>
#include <linux/init.h>
408
#include <linux/pid.h>
409
#include <linux/bug.h>
410
#include <linux/mutex.h>
411
#include <linux/capability.h>
412
#include <linux/semaphore.h>
Mark Fasheh's avatar
Mark Fasheh committed
413
#include <linux/fiemap.h>
414
#include <linux/rculist_bl.h>
415
#include <linux/atomic.h>
416
#include <linux/shrinker.h>
417
#include <linux/migrate_mode.h>
418
#include <linux/uidgid.h>
419
#include <linux/lockdep.h>
Linus Torvalds's avatar
Linus Torvalds committed
420
421
422

#include <asm/byteorder.h>

423
struct export_operations;
424
struct hd_geometry;
Linus Torvalds's avatar
Linus Torvalds committed
425
426
struct iovec;
struct nameidata;
427
struct kiocb;
Alexey Dobriyan's avatar
Alexey Dobriyan committed
428
struct kobject;
Linus Torvalds's avatar
Linus Torvalds committed
429
430
431
432
433
struct pipe_inode_info;
struct poll_table_struct;
struct kstatfs;
struct vm_area_struct;
struct vfsmount;
434
struct cred;
435
struct swap_info_struct;
Linus Torvalds's avatar
Linus Torvalds committed
436

437
extern void __init inode_init(void);
Linus Torvalds's avatar
Linus Torvalds committed
438
439
440
extern void __init inode_init_early(void);
extern void __init files_init(unsigned long);

441
extern struct files_stat_struct files_stat;
442
extern unsigned long get_max_files(void);
443
444
445
extern int sysctl_nr_open;
extern struct inodes_stat_t inodes_stat;
extern int leases_enable, lease_break_time;
Kees Cook's avatar
Kees Cook committed
446
447
extern int sysctl_protected_symlinks;
extern int sysctl_protected_hardlinks;
448

Linus Torvalds's avatar
Linus Torvalds committed
449
450
451
struct buffer_head;
typedef int (get_block_t)(struct inode *inode, sector_t iblock,
			struct buffer_head *bh_result, int create);
452
typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
453
454
			ssize_t bytes, void *private, int ret,
			bool is_async);
Linus Torvalds's avatar
Linus Torvalds committed
455
456
457
458
459

/*
 * Attribute flags.  These should be or-ed together to figure out what
 * has been changed!
 */
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
#define ATTR_MODE	(1 << 0)
#define ATTR_UID	(1 << 1)
#define ATTR_GID	(1 << 2)
#define ATTR_SIZE	(1 << 3)
#define ATTR_ATIME	(1 << 4)
#define ATTR_MTIME	(1 << 5)
#define ATTR_CTIME	(1 << 6)
#define ATTR_ATIME_SET	(1 << 7)
#define ATTR_MTIME_SET	(1 << 8)
#define ATTR_FORCE	(1 << 9) /* Not a change, but a change it */
#define ATTR_ATTR_FLAG	(1 << 10)
#define ATTR_KILL_SUID	(1 << 11)
#define ATTR_KILL_SGID	(1 << 12)
#define ATTR_FILE	(1 << 13)
#define ATTR_KILL_PRIV	(1 << 14)
#define ATTR_OPEN	(1 << 15) /* Truncating from open(O_TRUNC) */
#define ATTR_TIMES_SET	(1 << 16)
Linus Torvalds's avatar
Linus Torvalds committed
477
478
479
480
481
482
483
484
485
486
487
488
489

/*
 * This is the Inode Attributes structure, used for notify_change().  It
 * uses the above definitions as flags, to know which values have changed.
 * Also, in this manner, a Filesystem can look at only the values it cares
 * about.  Basically, these are the attributes that the VFS layer can
 * request to change from the FS layer.
 *
 * Derek Atkins <warlord@MIT.EDU> 94-10-20
 */
struct iattr {
	unsigned int	ia_valid;
	umode_t		ia_mode;
490
491
	kuid_t		ia_uid;
	kgid_t		ia_gid;
Linus Torvalds's avatar
Linus Torvalds committed
492
493
494
495
	loff_t		ia_size;
	struct timespec	ia_atime;
	struct timespec	ia_mtime;
	struct timespec	ia_ctime;
496
497

	/*
Lucas De Marchi's avatar
Lucas De Marchi committed
498
	 * Not an attribute, but an auxiliary info for filesystems wanting to
499
500
501
502
	 * implement an ftruncate() like method.  NOTE: filesystem should
	 * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL).
	 */
	struct file	*ia_file;
Linus Torvalds's avatar
Linus Torvalds committed
503
504
505
506
507
508
509
};

/*
 * Includes for diskquotas.
 */
#include <linux/quota.h>

510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
/** 
 * enum positive_aop_returns - aop return codes with specific semantics
 *
 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
 * 			    completed, that the page is still locked, and
 * 			    should be considered active.  The VM uses this hint
 * 			    to return the page to the active list -- it won't
 * 			    be a candidate for writeback again in the near
 * 			    future.  Other callers must be careful to unlock
 * 			    the page if they get this return.  Returned by
 * 			    writepage(); 
 *
 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
 *  			unlocked it and the page might have been truncated.
 *  			The caller should back up to acquiring a new page and
 *  			trying again.  The aop will be taking reasonable
 *  			precautions not to livelock.  If the caller held a page
 *  			reference, it should drop it before retrying.  Returned
528
 *  			by readpage().
529
530
531
532
533
534
535
536
537
538
539
540
 *
 * address_space_operation functions return these large constants to indicate
 * special semantics to the caller.  These are much larger than the bytes in a
 * page to allow for functions that return the number of bytes operated on in a
 * given page.
 */

enum positive_aop_returns {
	AOP_WRITEPAGE_ACTIVATE	= 0x80000,
	AOP_TRUNCATED_PAGE	= 0x80001,
};

541
#define AOP_FLAG_UNINTERRUPTIBLE	0x0001 /* will not do a short write */
Nick Piggin's avatar
Nick Piggin committed
542
#define AOP_FLAG_CONT_EXPAND		0x0002 /* called from cont_expand */
543
544
545
#define AOP_FLAG_NOFS			0x0004 /* used by filesystem to direct
						* helper code (eg buffer layer)
						* to clear GFP_FS from alloc */
546

Linus Torvalds's avatar
Linus Torvalds committed
547
548
549
550
551
552
553
/*
 * oh the beauties of C type declarations.
 */
struct page;
struct address_space;
struct writeback_control;

Nick Piggin's avatar
Nick Piggin committed
554
555
556
557
558
559
560
561
562
563
564
565
struct iov_iter {
	const struct iovec *iov;
	unsigned long nr_segs;
	size_t iov_offset;
	size_t count;
};

size_t iov_iter_copy_from_user_atomic(struct page *page,
		struct iov_iter *i, unsigned long offset, size_t bytes);
size_t iov_iter_copy_from_user(struct page *page,
		struct iov_iter *i, unsigned long offset, size_t bytes);
void iov_iter_advance(struct iov_iter *i, size_t bytes);
566
int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
Nick Piggin's avatar
Nick Piggin committed
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
size_t iov_iter_single_seg_count(struct iov_iter *i);

static inline void iov_iter_init(struct iov_iter *i,
			const struct iovec *iov, unsigned long nr_segs,
			size_t count, size_t written)
{
	i->iov = iov;
	i->nr_segs = nr_segs;
	i->iov_offset = 0;
	i->count = count + written;

	iov_iter_advance(i, written);
}

static inline size_t iov_iter_count(struct iov_iter *i)
{
	return i->count;
}

586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
/*
 * "descriptor" for what we're up to with a read.
 * This allows us to use the same read code yet
 * have multiple different users of the data that
 * we read from a file.
 *
 * The simplest case just copies the data to user
 * mode.
 */
typedef struct {
	size_t written;
	size_t count;
	union {
		char __user *buf;
		void *data;
	} arg;
	int error;
} read_descriptor_t;

typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
		unsigned long, unsigned long);
Nick Piggin's avatar
Nick Piggin committed
607

Linus Torvalds's avatar
Linus Torvalds committed
608
609
610
611
612
613
614
struct address_space_operations {
	int (*writepage)(struct page *page, struct writeback_control *wbc);
	int (*readpage)(struct file *, struct page *);

	/* Write back some dirty pages from this mapping. */
	int (*writepages)(struct address_space *, struct writeback_control *);

615
	/* Set a page dirty.  Return true if this dirtied it */
Linus Torvalds's avatar
Linus Torvalds committed
616
617
618
619
620
	int (*set_page_dirty)(struct page *page);

	int (*readpages)(struct file *filp, struct address_space *mapping,
			struct list_head *pages, unsigned nr_pages);

621
622
623
624
625
626
627
	int (*write_begin)(struct file *, struct address_space *mapping,
				loff_t pos, unsigned len, unsigned flags,
				struct page **pagep, void **fsdata);
	int (*write_end)(struct file *, struct address_space *mapping,
				loff_t pos, unsigned len, unsigned copied,
				struct page *page, void *fsdata);

Linus Torvalds's avatar
Linus Torvalds committed
628
629
	/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
	sector_t (*bmap)(struct address_space *, sector_t);
630
	void (*invalidatepage) (struct page *, unsigned long);
Al Viro's avatar
Al Viro committed
631
	int (*releasepage) (struct page *, gfp_t);
632
	void (*freepage)(struct page *);
Linus Torvalds's avatar
Linus Torvalds committed
633
634
	ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
			loff_t offset, unsigned long nr_segs);
635
636
	int (*get_xip_mem)(struct address_space *, pgoff_t, int,
						void **, unsigned long *);
637
638
639
640
	/*
	 * migrate the contents of a page to the specified target. If sync
	 * is false, it must not block.
	 */
641
	int (*migratepage) (struct address_space *,
642
			struct page *, struct page *, enum migrate_mode);
643
	int (*launder_page) (struct page *);
644
645
	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
					unsigned long);
646
	int (*error_remove_page)(struct address_space *, struct page *);
647
648

	/* swapfile support */
649
650
651
	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
				sector_t *span);
	void (*swap_deactivate)(struct file *file);
Linus Torvalds's avatar
Linus Torvalds committed
652
653
};

Jens Axboe's avatar
Jens Axboe committed
654
655
extern const struct address_space_operations empty_aops;

656
657
658
659
660
661
662
663
664
665
666
667
/*
 * pagecache_write_begin/pagecache_write_end must be used by general code
 * to write into the pagecache.
 */
int pagecache_write_begin(struct file *, struct address_space *mapping,
				loff_t pos, unsigned len, unsigned flags,
				struct page **pagep, void **fsdata);

int pagecache_write_end(struct file *, struct address_space *mapping,
				loff_t pos, unsigned len, unsigned copied,
				struct page *page, void *fsdata);

Linus Torvalds's avatar
Linus Torvalds committed
668
669
670
671
struct backing_dev_info;
struct address_space {
	struct inode		*host;		/* owner: inode, block_device */
	struct radix_tree_root	page_tree;	/* radix tree of all pages */
Nick Piggin's avatar
Nick Piggin committed
672
	spinlock_t		tree_lock;	/* and lock protecting it */
Linus Torvalds's avatar
Linus Torvalds committed
673
674
675
	unsigned int		i_mmap_writable;/* count VM_SHARED mappings */
	struct prio_tree_root	i_mmap;		/* tree of private and shared mappings */
	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
676
	struct mutex		i_mmap_mutex;	/* protect tree, count, list */
677
	/* Protected by tree_lock together with the radix tree */
Linus Torvalds's avatar
Linus Torvalds committed
678
679
	unsigned long		nrpages;	/* number of total pages */
	pgoff_t			writeback_index;/* writeback starts here */
680
	const struct address_space_operations *a_ops;	/* methods */
Linus Torvalds's avatar
Linus Torvalds committed
681
682
683
684
685
686
687
688
	unsigned long		flags;		/* error bits/gfp mask */
	struct backing_dev_info *backing_dev_info; /* device readahead, etc */
	spinlock_t		private_lock;	/* for use by the address_space */
	struct list_head	private_list;	/* ditto */
	struct address_space	*assoc_mapping;	/* ditto */
} __attribute__((aligned(sizeof(long))));
	/*
	 * On most architectures that alignment is already the case; but
Lucas De Marchi's avatar
Lucas De Marchi committed
689
	 * must be enforced here for CRIS, to let the least significant bit
Linus Torvalds's avatar
Linus Torvalds committed
690
691
	 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
	 */
692
struct request_queue;
Linus Torvalds's avatar
Linus Torvalds committed
693
694
695

struct block_device {
	dev_t			bd_dev;  /* not a kdev_t - it's a search key */
696
	int			bd_openers;
Linus Torvalds's avatar
Linus Torvalds committed
697
	struct inode *		bd_inode;	/* will die */
698
	struct super_block *	bd_super;
699
	struct mutex		bd_mutex;	/* open/close mutex */
Linus Torvalds's avatar
Linus Torvalds committed
700
	struct list_head	bd_inodes;
701
	void *			bd_claiming;
Linus Torvalds's avatar
Linus Torvalds committed
702
703
	void *			bd_holder;
	int			bd_holders;
704
	bool			bd_write_holder;
705
#ifdef CONFIG_SYSFS
706
	struct list_head	bd_holder_disks;
707
#endif
Linus Torvalds's avatar
Linus Torvalds committed
708
709
710
711
712
713
714
	struct block_device *	bd_contains;
	unsigned		bd_block_size;
	struct hd_struct *	bd_part;
	/* number of times partitions within this device have been opened. */
	unsigned		bd_part_count;
	int			bd_invalidated;
	struct gendisk *	bd_disk;
715
	struct request_queue *  bd_queue;
Linus Torvalds's avatar
Linus Torvalds committed
716
717
718
719
720
721
722
723
	struct list_head	bd_list;
	/*
	 * Private data.  You must have bd_claim'ed the block_device
	 * to use this.  NOTE:  bd_claim allows an owner to claim
	 * the same device multiple times, the owner must take special
	 * care to not mess up bd_private for that case.
	 */
	unsigned long		bd_private;
724
725
726
727
728

	/* The counter of freeze processes */
	int			bd_fsfreeze_count;
	/* Mutex for freeze */
	struct mutex		bd_fsfreeze_mutex;
729
	/* A semaphore that prevents I/O while block size is being changed */
730
	struct percpu_rw_semaphore	bd_block_size_semaphore;
Linus Torvalds's avatar
Linus Torvalds committed
731
732
733
734
735
736
737
738
};

/*
 * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
 * radix trees
 */
#define PAGECACHE_TAG_DIRTY	0
#define PAGECACHE_TAG_WRITEBACK	1
739
#define PAGECACHE_TAG_TOWRITE	2
Linus Torvalds's avatar
Linus Torvalds committed
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773

int mapping_tagged(struct address_space *mapping, int tag);

/*
 * Might pages of this file be mapped into userspace?
 */
static inline int mapping_mapped(struct address_space *mapping)
{
	return	!prio_tree_empty(&mapping->i_mmap) ||
		!list_empty(&mapping->i_mmap_nonlinear);
}

/*
 * Might pages of this file have been modified in userspace?
 * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
 * marks vma as VM_SHARED if it is shared, and the file was opened for
 * writing i.e. vma may be mprotected writable even if now readonly.
 */
static inline int mapping_writably_mapped(struct address_space *mapping)
{
	return mapping->i_mmap_writable != 0;
}

/*
 * Use sequence counter to get consistent i_size on 32-bit processors.
 */
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
#include <linux/seqlock.h>
#define __NEED_I_SIZE_ORDERED
#define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount)
#else
#define i_size_ordered_init(inode) do { } while (0)
#endif

Al Viro's avatar
Al Viro committed
774
775
776
struct posix_acl;
#define ACL_NOT_CACHED ((void *)(-1))

777
778
779
780
781
782
783
784
785
#define IOP_FASTPERM	0x0001
#define IOP_LOOKUP	0x0002
#define IOP_NOFOLLOW	0x0004

/*
 * Keep mostly read-only and often accessed (especially for
 * the RCU path lookup and 'stat' data) fields at the beginning
 * of the 'struct inode'
 */
Linus Torvalds's avatar
Linus Torvalds committed
786
struct inode {
787
	umode_t			i_mode;
788
	unsigned short		i_opflags;
789
790
	kuid_t			i_uid;
	kgid_t			i_gid;
791
792
793
794
795
796
797
	unsigned int		i_flags;

#ifdef CONFIG_FS_POSIX_ACL
	struct posix_acl	*i_acl;
	struct posix_acl	*i_default_acl;
#endif

798
799
	const struct inode_operations	*i_op;
	struct super_block	*i_sb;
800
	struct address_space	*i_mapping;
801

802
803
804
#ifdef CONFIG_SECURITY
	void			*i_security;
#endif
805

806
807
	/* Stat data, not accessed from path walking */
	unsigned long		i_ino;
Miklos Szeredi's avatar
Miklos Szeredi committed
808
809
810
811
812
813
814
815
816
817
818
	/*
	 * Filesystems may only read i_nlink directly.  They shall use the
	 * following functions for modification:
	 *
	 *    (set|clear|inc|drop)_nlink
	 *    inode_(inc|dec)_link_count
	 */
	union {
		const unsigned int i_nlink;
		unsigned int __i_nlink;
	};
819
	dev_t			i_rdev;
820
	loff_t			i_size;
821
822
823
	struct timespec		i_atime;
	struct timespec		i_mtime;
	struct timespec		i_ctime;
824
825
	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
	unsigned short          i_bytes;
826
	unsigned int		i_blkbits;
827
828
829
830
831
832
833
834
835
	blkcnt_t		i_blocks;

#ifdef __NEED_I_SIZE_ORDERED
	seqcount_t		i_size_seqcount;
#endif

	/* Misc */
	unsigned long		i_state;
	struct mutex		i_mutex;
836

837
838
	unsigned long		dirtied_when;	/* jiffies of first dirtying */

Linus Torvalds's avatar
Linus Torvalds committed
839
	struct hlist_node	i_hash;
Nick Piggin's avatar
Nick Piggin committed
840
841
	struct list_head	i_wb_list;	/* backing dev IO list */
	struct list_head	i_lru;		/* inode LRU list */
Linus Torvalds's avatar
Linus Torvalds committed
842
	struct list_head	i_sb_list;
Nick Piggin's avatar
Nick Piggin committed
843
	union {
844
		struct hlist_head	i_dentry;
Nick Piggin's avatar
Nick Piggin committed
845
846
		struct rcu_head		i_rcu;
	};
847
	u64			i_version;
848
	atomic_t		i_count;
Christoph Hellwig's avatar
Christoph Hellwig committed
849
	atomic_t		i_dio_count;
850
	atomic_t		i_writecount;
851
	const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
Linus Torvalds's avatar
Linus Torvalds committed
852
853
854
855
856
857
	struct file_lock	*i_flock;
	struct address_space	i_data;
#ifdef CONFIG_QUOTA
	struct dquot		*i_dquot[MAXQUOTAS];
#endif
	struct list_head	i_devices;
858
859
	union {
		struct pipe_inode_info	*i_pipe;
860
		struct block_device	*i_bdev;
861
		struct cdev		*i_cdev;
862
	};
Linus Torvalds's avatar
Linus Torvalds committed
863
864
865

	__u32			i_generation;

866
867
#ifdef CONFIG_FSNOTIFY
	__u32			i_fsnotify_mask; /* all events this inode cares about */
868
	struct hlist_head	i_fsnotify_marks;
Robert Love's avatar
Robert Love committed
869
870
#endif

871
#ifdef CONFIG_IMA
872
	atomic_t		i_readcount; /* struct files open RO */
873
#endif
874
	void			*i_private; /* fs or device private pointer */
Linus Torvalds's avatar
Linus Torvalds committed
875
876
};

Al Viro's avatar
Al Viro committed
877
878
879
880
881
static inline int inode_unhashed(struct inode *inode)
{
	return hlist_unhashed(&inode->i_hash);
}

882
883
884
885
886
887
888
889
890
/*
 * inode->i_mutex nesting subclasses for the lock validator:
 *
 * 0: the object of the current VFS operation
 * 1: parent
 * 2: child/target
 * 3: quota file
 *
 * The locking order between these classes is
891
 * parent -> child -> normal -> xattr -> quota
892
893
894
895
896
897
 */
enum inode_i_mutex_lock_class
{
	I_MUTEX_NORMAL,
	I_MUTEX_PARENT,
	I_MUTEX_CHILD,
898
	I_MUTEX_XATTR,
899
900
901
	I_MUTEX_QUOTA
};

Linus Torvalds's avatar
Linus Torvalds committed
902
903
904
905
906
907
908
909
910
911
/*
 * NOTE: in a 32bit arch with a preemptable kernel and
 * an UP compile the i_size_read/write must be atomic
 * with respect to the local cpu (unlike with preempt disabled),
 * but they don't need to be atomic with respect to other cpus like in
 * true SMP (so they need either to either locally disable irq around
 * the read or for example on x86 they can be still implemented as a
 * cmpxchg8b without the need of the lock prefix). For SMP compiles
 * and 64bit archs it makes no difference if preempt is enabled or not.
 */
912
static inline loff_t i_size_read(const struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
	loff_t i_size;
	unsigned int seq;

	do {
		seq = read_seqcount_begin(&inode->i_size_seqcount);
		i_size = inode->i_size;
	} while (read_seqcount_retry(&inode->i_size_seqcount, seq));
	return i_size;
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
	loff_t i_size;

	preempt_disable();
	i_size = inode->i_size;
	preempt_enable();
	return i_size;
#else
	return inode->i_size;
#endif
}

935
936
937
938
939
/*
 * NOTE: unlike i_size_read(), i_size_write() does need locking around it
 * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount
 * can be lost, resulting in subsequent i_size_read() calls spinning forever.
 */
Linus Torvalds's avatar
Linus Torvalds committed
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
static inline void i_size_write(struct inode *inode, loff_t i_size)
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
	write_seqcount_begin(&inode->i_size_seqcount);
	inode->i_size = i_size;
	write_seqcount_end(&inode->i_size_seqcount);
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
	preempt_disable();
	inode->i_size = i_size;
	preempt_enable();
#else
	inode->i_size = i_size;
#endif
}

955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
/* Helper functions so that in most cases filesystems will
 * not need to deal directly with kuid_t and kgid_t and can
 * instead deal with the raw numeric values that are stored
 * in the filesystem.
 */
static inline uid_t i_uid_read(const struct inode *inode)
{
	return from_kuid(&init_user_ns, inode->i_uid);
}

static inline gid_t i_gid_read(const struct inode *inode)
{
	return from_kgid(&init_user_ns, inode->i_gid);
}

static inline void i_uid_write(struct inode *inode, uid_t uid)
{
	inode->i_uid = make_kuid(&init_user_ns, uid);
}

static inline void i_gid_write(struct inode *inode, gid_t gid)
{
	inode->i_gid = make_kgid(&init_user_ns, gid);
}

980
static inline unsigned iminor(const struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
981
982
983
984
{
	return MINOR(inode->i_rdev);
}

985
static inline unsigned imajor(const struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
986
987
988
989
990
991
992
993
{
	return MAJOR(inode->i_rdev);
}

extern struct block_device *I_BDEV(struct inode *inode);

struct fown_struct {
	rwlock_t lock;          /* protects pid, uid, euid fields */
994
995
	struct pid *pid;	/* pid or -pgrp where SIGIO should be sent */
	enum pid_type pid_type;	/* Kind of process group SIGIO should be sent to */
996
	kuid_t uid, euid;	/* uid/euid of process setting the owner */
Linus Torvalds's avatar
Linus Torvalds committed
997
998
999
1000
1001
1002
1003
	int signum;		/* posix.1b rt signal to be delivered on IO */
};

/*
 * Track a single file's readahead state
 */
struct file_ra_state {
1004
1005
1006
	pgoff_t start;			/* where readahead started */
	unsigned int size;		/* # of readahead pages */
	unsigned int async_size;	/* do asynchronous readahead when
1007
					   there are only # of pages ahead */
1008

1009
	unsigned int ra_pages;		/* Maximum readahead window */
1010
	unsigned int mmap_miss;		/* Cache miss stat for mmap accesses */
1011
	loff_t prev_pos;		/* Cache last read() position */
Linus Torvalds's avatar
Linus Torvalds committed
1012
1013
};

1014
1015
1016
1017
1018
/*
 * Check if @index falls in the readahead windows.
 */
static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
{
1019
1020
	return (index >= ra->start &&
		index <  ra->start + ra->size);
1021
1022
}

1023
1024
1025
#define FILE_MNT_WRITE_TAKEN	1
#define FILE_MNT_WRITE_RELEASED	2

Linus Torvalds's avatar
Linus Torvalds committed
1026
struct file {
1027
1028
1029
1030
1031
1032
1033
1034
	/*
	 * fu_list becomes invalid after file_free is called and queued via
	 * fu_rcuhead for RCU freeing
	 */
	union {
		struct list_head	fu_list;
		struct rcu_head 	fu_rcuhead;
	} f_u;
1035
1036
1037
	struct path		f_path;
#define f_dentry	f_path.dentry
#define f_vfsmnt	f_path.mnt
1038
	const struct file_operations	*f_op;
1039
1040
1041
1042
1043
1044

	/*
	 * Protects f_ep_links, f_flags, f_pos vs i_size in lseek SEEK_CUR.
	 * Must not be taken from IRQ context.
	 */
	spinlock_t		f_lock;
Nick Piggin's avatar
Nick Piggin committed
1045
1046
1047
#ifdef CONFIG_SMP
	int			f_sb_list_cpu;
#endif
Al Viro's avatar
Al Viro committed
1048
	atomic_long_t		f_count;
Linus Torvalds's avatar
Linus Torvalds committed
1049
	unsigned int 		f_flags;
1050
	fmode_t			f_mode;
Linus Torvalds's avatar
Linus Torvalds committed
1051
1052
	loff_t			f_pos;
	struct fown_struct	f_owner;
1053
	const struct cred	*f_cred;
Linus Torvalds's avatar
Linus Torvalds committed
1054
1055
	struct file_ra_state	f_ra;

1056
	u64			f_version;
1057
#ifdef CONFIG_SECURITY
Linus Torvalds's avatar
Linus Torvalds committed
1058
	void			*f_security;
1059
#endif
Linus Torvalds's avatar
Linus Torvalds committed
1060
1061
1062
1063
1064
1065
	/* needed for tty driver, and maybe others */
	void			*private_data;

#ifdef CONFIG_EPOLL
	/* Used by fs/eventpoll.c to link all the hooks to this file */
	struct list_head	f_ep_links;
Jason Baron's avatar
Jason Baron committed
1066
	struct list_head	f_tfile_llink;
Linus Torvalds's avatar
Linus Torvalds committed
1067
1068
#endif /* #ifdef CONFIG_EPOLL */
	struct address_space	*f_mapping;
1069
1070
1071
#ifdef CONFIG_DEBUG_WRITECOUNT
	unsigned long f_mnt_write_state;
#endif
Linus Torvalds's avatar
Linus Torvalds committed
1072
1073
};

1074
1075
1076
1077
1078
1079
1080
struct file_handle {
	__u32 handle_bytes;
	int handle_type;
	/* file identifier */
	unsigned char f_handle[0];
};

Al Viro's avatar
Al Viro committed
1081
#define get_file(x)	atomic_long_inc(&(x)->f_count)
1082
#define fput_atomic(x)	atomic_long_add_unless(&(x)->f_count, -1, 1)
Al Viro's avatar
Al Viro committed
1083
#define file_count(x)	atomic_long_read(&(x)->f_count)
Linus Torvalds's avatar
Linus Torvalds committed
1084

1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
#ifdef CONFIG_DEBUG_WRITECOUNT
static inline void file_take_write(struct file *f)
{
	WARN_ON(f->f_mnt_write_state != 0);
	f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN;
}
static inline void file_release_write(struct file *f)
{
	f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED;
}
static inline void file_reset_write(struct file *f)
{
	f->f_mnt_write_state = 0;
}
static inline void file_check_state(struct file *f)
{
	/*
	 * At this point, either both or neither of these bits
	 * should be set.
	 */
	WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN);
	WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED);
}
static inline int file_check_writeable(struct file *f)
{
	if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN)
		return 0;
	printk(KERN_WARNING "writeable file with no "
			    "mnt_want_write()\n");
	WARN_ON(1);
	return -EINVAL;
}
#else /* !CONFIG_DEBUG_WRITECOUNT */
static inline void file_take_write(struct file *filp) {}
static inline void file_release_write(struct file *filp) {}
static inline void file_reset_write(struct file *filp) {}
static inline void file_check_state(struct file *filp) {}
static inline int file_check_writeable(struct file *filp)
{
	return 0;
}
#endif /* CONFIG_DEBUG_WRITECOUNT */

Linus Torvalds's avatar
Linus Torvalds committed
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
#define	MAX_NON_LFS	((1UL<<31) - 1)

/* Page cache limit. The filesystems should put that into their s_maxbytes 
   limits, otherwise bad things can happen in VM. */ 
#if BITS_PER_LONG==32
#define MAX_LFS_FILESIZE	(((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) 
#elif BITS_PER_LONG==64
#define MAX_LFS_FILESIZE 	0x7fffffffffffffffUL
#endif

#define FL_POSIX	1
#define FL_FLOCK	2
#define FL_ACCESS	8	/* not trying to lock, just looking */
1141
#define FL_EXISTS	16	/* when unlocking, test for existence */
Linus Torvalds's avatar
Linus Torvalds committed
1142
#define FL_LEASE	32	/* lease held on this file */
1143
#define FL_CLOSE	64	/* unlock on close */
Linus Torvalds's avatar
Linus Torvalds committed
1144
#define FL_SLEEP	128	/* A blocking lock */
1145
1146
#define FL_DOWNGRADE_PENDING	256 /* Lease is being downgraded */
#define FL_UNLOCK_PENDING	512 /* Lease is being broken */
Linus Torvalds's avatar
Linus Torvalds committed
1147

1148
1149
1150
1151
1152
1153
/*
 * Special return value from posix_lock_file() and vfs_lock_file() for
 * asynchronous locking.
 */
#define FILE_LOCK_DEFERRED 1

Linus Torvalds's avatar
Linus Torvalds committed
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
/*
 * The POSIX file lock owner is determined by
 * the "struct files_struct" in the thread group
 * (or NULL for no owner - BSD locks).
 *
 * Lockd stuffs a "host" pointer into this.
 */
typedef struct files_struct *fl_owner_t;

struct file_lock_operations {
	void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
	void (*fl_release_private)(struct file_lock *);
};

struct lock_manager_operations {
1169
1170
1171
1172
1173
	int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
	void (*lm_notify)(struct file_lock *);	/* unblock callback */
	int (*lm_grant)(struct file_lock *, struct file_lock *, int);
	void (*lm_break)(struct file_lock *);
	int (*lm_change)(struct file_lock **, int);
Linus Torvalds's avatar
Linus Torvalds committed
1174
1175
};

1176
1177
1178
1179
struct lock_manager {
	struct list_head list;
};

1180
1181
struct net;
void locks_start_grace(struct net *, struct lock_manager *);
1182
void locks_end_grace(struct lock_manager *);
1183
int locks_in_grace(struct net *);
1184

Linus Torvalds's avatar
Linus Torvalds committed
1185
1186
1187
1188
1189
1190
1191
1192
/* that will die - we need it for nfs_lock_info */
#include <linux/nfs_fs_i.h>

struct file_lock {
	struct file_lock *fl_next;	/* singly linked list for this inode  */
	struct list_head fl_link;	/* doubly linked list of all locks */
	struct list_head fl_block;	/* circular list of blocked processes */
	fl_owner_t fl_owner;
1193
	unsigned int fl_flags;
1194
	unsigned char fl_type;
Linus Torvalds's avatar
Linus Torvalds committed
1195
	unsigned int fl_pid;
1196
	struct pid *fl_nspid;
Linus Torvalds's avatar
Linus Torvalds committed
1197
1198
1199
1200
1201
1202
	wait_queue_head_t fl_wait;
	struct file *fl_file;
	loff_t fl_start;
	loff_t fl_end;

	struct fasync_struct *	fl_fasync; /* for lease break notifications */
1203
1204
1205
	/* for lease breaks: */
	unsigned long fl_break_time;
	unsigned long fl_downgrade_time;
Linus Torvalds's avatar
Linus Torvalds committed
1206

1207
	const struct file_lock_operations *fl_ops;	/* Callbacks for filesystems */
1208
	const struct lock_manager_operations *fl_lmops;	/* Callbacks for lockmanagers */
Linus Torvalds's avatar
Linus Torvalds committed
1209
1210
	union {
		struct nfs_lock_info	nfs_fl;
1211
		struct nfs4_lock_info	nfs4_fl;
David Howells's avatar
David Howells committed
1212
1213
1214
1215
		struct {
			struct list_head link;	/* link in AFS vnode's pending_locks list */
			int state;		/* state of grant or error if -ve */
		} afs;
Linus Torvalds's avatar
Linus Torvalds committed
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
	} fl_u;
};

/* The following constant reflects the upper bound of the file/locking space */
#ifndef OFFSET_MAX
#define INT_LIMIT(x)	(~((x)1 << (sizeof(x)*8 - 1)))
#define OFFSET_MAX	INT_LIMIT(loff_t)
#define OFFT_OFFSET_MAX	INT_LIMIT(off_t)
#endif

#include <linux/fcntl.h>

1228
1229
1230
extern void send_sigio(struct fown_struct *fown, int fd, int band);

#ifdef CONFIG_FILE_LOCKING
Linus Torvalds's avatar
Linus Torvalds committed
1231
extern int fcntl_getlk(struct file *, struct flock __user *);
1232
1233
extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
			struct flock __user *);
Linus Torvalds's avatar
Linus Torvalds committed
1234
1235
1236

#if BITS_PER_LONG == 32
extern int fcntl_getlk64(struct file *, struct flock64 __user *);
1237
1238
extern int fcntl_setlk64(unsigned int, struct file *, unsigned int,
			struct flock64 __user *);
Linus Torvalds's avatar
Linus Torvalds committed
1239
1240
1241
1242
1243
1244
#endif

extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
extern int fcntl_getlease(struct file *filp);

/* fs/locks.c */
1245
void locks_free_lock(struct file_lock *fl);
Linus Torvalds's avatar
Linus Torvalds committed
1246
extern void locks_init_lock(struct file_lock *);
1247
extern struct file_lock * locks_alloc_lock(void);
Linus Torvalds's avatar
Linus Torvalds committed
1248
extern void locks_copy_lock(struct file_lock *, struct file_lock *);
1249
extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
Linus Torvalds's avatar
Linus Torvalds committed
1250
1251
extern void locks_remove_posix(struct file *, fl_owner_t);
extern void locks_remove_flock(struct file *);
1252
extern void locks_release_private(struct file_lock *);
1253
extern void posix_test_lock(struct file *, struct file_lock *);
1254
extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
Linus Torvalds's avatar
Linus Torvalds committed
1255
extern int posix_lock_file_wait(struct file *, struct file_lock *);
J. Bruce Fields's avatar
J. Bruce Fields committed
1256
extern int posix_unblock_lock(struct file *, struct file_lock *);
1257
extern int vfs_test_lock(struct file *, struct file_lock *);
1258
extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
Marc Eshel's avatar
Marc Eshel committed
1259
extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
Linus Torvalds's avatar
Linus Torvalds committed
1260
1261
1262
extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
extern int __break_lease(struct inode *inode, unsigned int flags);
extern void lease_get_mtime(struct inode *, struct timespec *time);
1263
extern int generic_setlease(struct file *, long, struct file_lock **);
1264
extern int vfs_setlease(struct file *, long, struct file_lock **);
Linus Torvalds's avatar
Linus Torvalds committed
1265
1266
1267
extern int lease_modify(struct file_lock **, int);
extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
1268
extern void locks_delete_block(struct file_lock *waiter);
1269
1270
extern void lock_flocks(void);
extern void unlock_flocks(void);
1271
#else /* !CONFIG_FILE_LOCKING */
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
static inline int fcntl_getlk(struct file *file, struct flock __user *user)
{
	return -EINVAL;
}

static inline int fcntl_setlk(unsigned int fd, struct file *file,
			      unsigned int cmd, struct flock __user *user)
{
	return -EACCES;
}

1283
#if BITS_PER_LONG == 32
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user)
{
	return -EINVAL;
}

static inline int fcntl_setlk64(unsigned int fd, struct file *file,
				unsigned int cmd, struct flock64 __user *user)
{
	return -EACCES;
}
1294
#endif
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
{
	return 0;
}

static inline int fcntl_getlease(struct file *filp)
{
	return 0;
}

static inline void locks_init_lock(struct file_lock *fl)
{
	return;
}

static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
	return;
}

static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
	return;
}

static inline void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
	return;
}

static inline void locks_remove_flock(struct file *filp)
{
	return;
}

static inline void posix_test_lock(struct file *filp, struct file_lock *fl)
{
	return;
}

static inline int posix_lock_file(struct file *filp, struct file_lock *fl,
				  struct file_lock *conflock)
{
	return -ENOLCK;
}

static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
{
	return -ENOLCK;
}

static inline int posix_unblock_lock(struct file *filp,
				     struct file_lock *waiter)
{
	return -ENOENT;
}

static inline int vfs_test_lock(struct file *filp, struct file_lock *fl)
{
	return 0;
}

static inline int vfs_lock_file(struct file *filp, unsigned int cmd,
				struct file_lock *fl, struct file_lock *conf)
{
	return -ENOLCK;
}

static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
{
	return 0;
}

static inline int flock_lock_file_wait(struct file *filp,
				       struct file_lock *request)
{
	return -ENOLCK;
}

static inline int __break_lease(struct inode *inode, unsigned int mode)
{
	return 0;
}

static inline void lease_get_mtime(struct inode *inode, struct timespec *time)
{
	return;
}

static inline int generic_setlease(struct file *filp, long arg,
				    struct file_lock **flp)
{
	return -EINVAL;
}

static inline int vfs_setlease(struct file *filp, long arg,
			       struct file_lock **lease)
{
	return -EINVAL;
}

static inline int lease_modify(struct file_lock **before, int arg)
{
	return -EINVAL;
}

static inline int lock_may_read(struct inode *inode, loff_t start,
				unsigned long len)
{
	return 1;
}

static inline int lock_may_write(struct inode *inode, loff_t start,
				 unsigned long len)
{
	return 1;
}

1413
1414
1415
1416
static inline void locks_delete_block(struct file_lock *waiter)
{
}

1417
1418
1419
1420
1421
1422
1423
1424
static inline void lock_flocks(void)
{
}

static inline void unlock_flocks(void)
{
}

1425
1426
#endif /* !CONFIG_FILE_LOCKING */

Linus Torvalds's avatar
Linus Torvalds committed
1427
1428

struct fasync_struct {
1429
1430
1431
1432
1433
1434
	spinlock_t		fa_lock;
	int			magic;
	int			fa_fd;
	struct fasync_struct	*fa_next; /* singly linked list */
	struct file		*fa_file;
	struct rcu_head		fa_rcu;
Linus Torvalds's avatar
Linus Torvalds committed
1435
1436
1437
1438
1439
1440
};

#define FASYNC_MAGIC 0x4601

/* SMP safe fasync helpers: */
extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
1441
1442
1443
1444
1445
extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *);
extern int fasync_remove_entry(struct file *, struct fasync_struct **);
extern struct fasync_struct *fasync_alloc(void);
extern void fasync_free(struct fasync_struct *);

Linus Torvalds's avatar
Linus Torvalds committed
1446
1447