file.c 30.9 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
/*
Tejun Heo's avatar
Tejun Heo committed
2
3
4
5
6
7
8
9
10
 * fs/sysfs/file.c - sysfs regular (text) file implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
 *
 * This file is released under the GPLv2.
 *
 * Please see Documentation/filesystems/sysfs.txt for more information.
Linus Torvalds's avatar
Linus Torvalds committed
11
12
13
14
 */

#include <linux/module.h>
#include <linux/kobject.h>
15
#include <linux/kallsyms.h>
16
#include <linux/slab.h>
17
#include <linux/fsnotify.h>
18
#include <linux/namei.h>
19
#include <linux/poll.h>
20
#include <linux/list.h>
21
#include <linux/mutex.h>
Andrew Morton's avatar
Andrew Morton committed
22
#include <linux/limits.h>
23
#include <linux/uaccess.h>
24
#include <linux/seq_file.h>
25
#include <linux/mm.h>
Linus Torvalds's avatar
Linus Torvalds committed
26
27
28

#include "sysfs.h"

Tejun Heo's avatar
Tejun Heo committed
29
/*
30
 * There's one sysfs_open_file for each open file and one sysfs_open_dirent
Tejun Heo's avatar
Tejun Heo committed
31
 * for each sysfs_dirent with one or more open files.
Tejun Heo's avatar
Tejun Heo committed
32
 *
Tejun Heo's avatar
Tejun Heo committed
33
34
35
 * sysfs_dirent->s_attr.open points to sysfs_open_dirent.  s_attr.open is
 * protected by sysfs_open_dirent_lock.
 *
36
37
 * filp->private_data points to seq_file whose ->private points to
 * sysfs_open_file.  sysfs_open_files are chained at
38
 * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex.
Tejun Heo's avatar
Tejun Heo committed
39
 */
Jiri Slaby's avatar
Jiri Slaby committed
40
static DEFINE_SPINLOCK(sysfs_open_dirent_lock);
Tejun Heo's avatar
Tejun Heo committed
41
static DEFINE_MUTEX(sysfs_open_file_mutex);
Tejun Heo's avatar
Tejun Heo committed
42
43
44

struct sysfs_open_dirent {
	atomic_t		refcnt;
45
46
	atomic_t		event;
	wait_queue_head_t	poll;
47
	struct list_head	files; /* goes through sysfs_open_file.list */
Tejun Heo's avatar
Tejun Heo committed
48
49
};

50
51
52
53
54
static struct sysfs_open_file *sysfs_of(struct file *file)
{
	return ((struct seq_file *)file->private_data)->private;
}

55
56
57
58
59
60
61
62
63
64
65
/*
 * Determine the kernfs_ops for the given sysfs_dirent.  This function must
 * be called while holding an active reference.
 */
static const struct kernfs_ops *kernfs_ops(struct sysfs_dirent *sd)
{
	if (!sysfs_ignore_lockdep(sd))
		lockdep_assert_held(sd);
	return sd->s_attr.ops;
}

Tejun Heo's avatar
Tejun Heo committed
66
67
68
69
70
71
/*
 * Determine ktype->sysfs_ops for the given sysfs_dirent.  This function
 * must be called while holding an active reference.
 */
static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd)
{
72
	struct kobject *kobj = sd->s_parent->priv;
Tejun Heo's avatar
Tejun Heo committed
73

74
75
	if (!sysfs_ignore_lockdep(sd))
		lockdep_assert_held(sd);
Tejun Heo's avatar
Tejun Heo committed
76
77
78
	return kobj->ktype ? kobj->ktype->sysfs_ops : NULL;
}

79
80
81
82
/*
 * Reads on sysfs are handled through seq_file, which takes care of hairy
 * details like buffering and seeking.  The following function pipes
 * sysfs_ops->show() result through seq_file.
Linus Torvalds's avatar
Linus Torvalds committed
83
 */
84
static int sysfs_kf_seq_show(struct seq_file *sf, void *v)
Linus Torvalds's avatar
Linus Torvalds committed
85
{
86
	struct sysfs_open_file *of = sf->private;
87
	struct kobject *kobj = of->sd->s_parent->priv;
88
	const struct sysfs_ops *ops = sysfs_file_ops(of->sd);
Linus Torvalds's avatar
Linus Torvalds committed
89
	ssize_t count;
90
	char *buf;
Linus Torvalds's avatar
Linus Torvalds committed
91

92
93
94
95
96
97
	/* acquire buffer and ensure that it's >= PAGE_SIZE */
	count = seq_get_buf(sf, &buf);
	if (count < PAGE_SIZE) {
		seq_commit(sf, -1);
		return 0;
	}
Linus Torvalds's avatar
Linus Torvalds committed
98

99
	/*
100
101
	 * Invoke show().  Control may reach here via seq file lseek even
	 * if @ops->show() isn't implemented.
102
	 */
103
	if (ops->show) {
104
		count = ops->show(kobj, of->sd->priv, buf);
105
106
107
		if (count < 0)
			return count;
	}
108

109
110
111
112
	/*
	 * The code works fine with PAGE_SIZE return but it's likely to
	 * indicate truncated result or overflow in normal use cases.
	 */
113
114
115
116
117
118
	if (count >= (ssize_t)PAGE_SIZE) {
		print_symbol("fill_read_buffer: %s returned bad count\n",
			(unsigned long)ops->show);
		/* Try to struggle along */
		count = PAGE_SIZE - 1;
	}
119
120
	seq_commit(sf, count);
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
121
122
}

123
124
static ssize_t sysfs_kf_bin_read(struct sysfs_open_file *of, char *buf,
				 size_t count, loff_t pos)
Tejun Heo's avatar
Tejun Heo committed
125
{
126
127
	struct bin_attribute *battr = of->sd->priv;
	struct kobject *kobj = of->sd->s_parent->priv;
128
	loff_t size = file_inode(of->file)->i_size;
Tejun Heo's avatar
Tejun Heo committed
129

130
	if (!count)
Tejun Heo's avatar
Tejun Heo committed
131
132
133
		return 0;

	if (size) {
134
		if (pos > size)
Tejun Heo's avatar
Tejun Heo committed
135
			return 0;
136
137
		if (pos + count > size)
			count = size - pos;
Tejun Heo's avatar
Tejun Heo committed
138
139
	}

140
141
142
143
144
145
146
147
148
	if (!battr->read)
		return -EIO;

	return battr->read(of->file, kobj, battr, buf, pos, count);
}

static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
{
	struct sysfs_open_file *of = sf->private;
149
	const struct kernfs_ops *ops;
150
151
152
153
154
155
156
157
158

	/*
	 * @of->mutex nests outside active ref and is just to ensure that
	 * the ops aren't called concurrently for the same open file.
	 */
	mutex_lock(&of->mutex);
	if (!sysfs_get_active(of->sd))
		return ERR_PTR(-ENODEV);

159
160
161
162
163
164
165
166
167
168
	ops = kernfs_ops(of->sd);
	if (ops->seq_start) {
		return ops->seq_start(sf, ppos);
	} else {
		/*
		 * The same behavior and code as single_open().  Returns
		 * !NULL if pos is at the beginning; otherwise, NULL.
		 */
		return NULL + !*ppos;
	}
169
170
171
172
}

static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
{
173
174
175
176
177
178
179
180
181
182
183
184
185
	struct sysfs_open_file *of = sf->private;
	const struct kernfs_ops *ops = kernfs_ops(of->sd);

	if (ops->seq_next) {
		return ops->seq_next(sf, v, ppos);
	} else {
		/*
		 * The same behavior and code as single_open(), always
		 * terminate after the initial read.
		 */
		++*ppos;
		return NULL;
	}
186
187
188
189
190
}

static void kernfs_seq_stop(struct seq_file *sf, void *v)
{
	struct sysfs_open_file *of = sf->private;
191
192
193
194
	const struct kernfs_ops *ops = kernfs_ops(of->sd);

	if (ops->seq_stop)
		ops->seq_stop(sf, v);
195
196
197
198
199
200
201
202
203
204
205

	sysfs_put_active(of->sd);
	mutex_unlock(&of->mutex);
}

static int kernfs_seq_show(struct seq_file *sf, void *v)
{
	struct sysfs_open_file *of = sf->private;

	of->event = atomic_read(&of->sd->s_attr.open->event);

206
	return of->sd->s_attr.ops->seq_show(sf, v);
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
}

static const struct seq_operations kernfs_seq_ops = {
	.start = kernfs_seq_start,
	.next = kernfs_seq_next,
	.stop = kernfs_seq_stop,
	.show = kernfs_seq_show,
};

/*
 * As reading a bin file can have side-effects, the exact offset and bytes
 * specified in read(2) call should be passed to the read callback making
 * it difficult to use seq_file.  Implement simplistic custom buffering for
 * bin files.
 */
static ssize_t kernfs_file_direct_read(struct sysfs_open_file *of,
				       char __user *user_buf, size_t count,
				       loff_t *ppos)
{
	ssize_t len = min_t(size_t, count, PAGE_SIZE);
227
	const struct kernfs_ops *ops;
228
229
230
	char *buf;

	buf = kmalloc(len, GFP_KERNEL);
Tejun Heo's avatar
Tejun Heo committed
231
232
233
	if (!buf)
		return -ENOMEM;

234
235
236
237
	/*
	 * @of->mutex nests outside active ref and is just to ensure that
	 * the ops aren't called concurrently for the same open file.
	 */
Tejun Heo's avatar
Tejun Heo committed
238
239
	mutex_lock(&of->mutex);
	if (!sysfs_get_active(of->sd)) {
240
		len = -ENODEV;
Tejun Heo's avatar
Tejun Heo committed
241
242
243
244
		mutex_unlock(&of->mutex);
		goto out_free;
	}

245
246
247
248
249
	ops = kernfs_ops(of->sd);
	if (ops->read)
		len = ops->read(of, buf, len, *ppos);
	else
		len = -EINVAL;
Tejun Heo's avatar
Tejun Heo committed
250
251
252
253

	sysfs_put_active(of->sd);
	mutex_unlock(&of->mutex);

254
	if (len < 0)
Tejun Heo's avatar
Tejun Heo committed
255
256
		goto out_free;

257
258
	if (copy_to_user(user_buf, buf, len)) {
		len = -EFAULT;
Tejun Heo's avatar
Tejun Heo committed
259
260
261
		goto out_free;
	}

262
	*ppos += len;
Tejun Heo's avatar
Tejun Heo committed
263
264
265

 out_free:
	kfree(buf);
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
	return len;
}

/**
 * kernfs_file_read - kernfs vfs read callback
 * @file: file pointer
 * @user_buf: data to write
 * @count: number of bytes
 * @ppos: starting offset
 */
static ssize_t kernfs_file_read(struct file *file, char __user *user_buf,
				size_t count, loff_t *ppos)
{
	struct sysfs_open_file *of = sysfs_of(file);

281
	if (of->sd->s_flags & SYSFS_FLAG_HAS_SEQ_SHOW)
282
		return seq_read(file, user_buf, count, ppos);
283
284
	else
		return kernfs_file_direct_read(of, user_buf, count, ppos);
Tejun Heo's avatar
Tejun Heo committed
285
286
}

287
288
289
/* kernfs write callback for regular sysfs files */
static ssize_t sysfs_kf_write(struct sysfs_open_file *of, char *buf,
			      size_t count, loff_t pos)
Linus Torvalds's avatar
Linus Torvalds committed
290
{
291
	const struct sysfs_ops *ops = sysfs_file_ops(of->sd);
292
	struct kobject *kobj = of->sd->s_parent->priv;
293

294
295
	if (!count)
		return 0;
296

297
298
	return ops->store(kobj, of->sd->priv, buf, count);
}
299

300
301
302
303
304
305
306
/* kernfs write callback for bin sysfs files */
static ssize_t sysfs_kf_bin_write(struct sysfs_open_file *of, char *buf,
				  size_t count, loff_t pos)
{
	struct bin_attribute *battr = of->sd->priv;
	struct kobject *kobj = of->sd->s_parent->priv;
	loff_t size = file_inode(of->file)->i_size;
307

308
309
310
311
	if (size) {
		if (size <= pos)
			return 0;
		count = min_t(ssize_t, count, size - pos);
312
	}
313
314
	if (!count)
		return 0;
315

316
317
	if (!battr->write)
		return -EIO;
Linus Torvalds's avatar
Linus Torvalds committed
318

319
	return battr->write(of->file, kobj, battr, buf, pos, count);
Linus Torvalds's avatar
Linus Torvalds committed
320
321
322
}

/**
323
 * kernfs_file_write - kernfs vfs write callback
Tejun Heo's avatar
Tejun Heo committed
324
325
326
327
328
 * @file: file pointer
 * @user_buf: data to write
 * @count: number of bytes
 * @ppos: starting offset
 *
329
330
 * Copy data in from userland and pass it to the matching kernfs write
 * operation.
Linus Torvalds's avatar
Linus Torvalds committed
331
 *
Tejun Heo's avatar
Tejun Heo committed
332
333
334
335
336
 * There is no easy way for us to know if userspace is only doing a partial
 * write, so we don't support them. We expect the entire buffer to come on
 * the first write.  Hint: if you're writing a value, first read the file,
 * modify only the the value you're changing, then write entire buffer
 * back.
Linus Torvalds's avatar
Linus Torvalds committed
337
 */
338
339
static ssize_t kernfs_file_write(struct file *file, const char __user *user_buf,
				 size_t count, loff_t *ppos)
Linus Torvalds's avatar
Linus Torvalds committed
340
{
341
	struct sysfs_open_file *of = sysfs_of(file);
342
	ssize_t len = min_t(size_t, count, PAGE_SIZE);
343
	const struct kernfs_ops *ops;
Tejun Heo's avatar
Tejun Heo committed
344
	char *buf;
Linus Torvalds's avatar
Linus Torvalds committed
345

Tejun Heo's avatar
Tejun Heo committed
346
347
348
349
350
351
352
353
354
355
	buf = kmalloc(len + 1, GFP_KERNEL);
	if (!buf)
		return -ENOMEM;

	if (copy_from_user(buf, user_buf, len)) {
		len = -EFAULT;
		goto out_free;
	}
	buf[len] = '\0';	/* guarantee string termination */

356
357
358
359
360
361
362
363
364
365
366
	/*
	 * @of->mutex nests outside active ref and is just to ensure that
	 * the ops aren't called concurrently for the same open file.
	 */
	mutex_lock(&of->mutex);
	if (!sysfs_get_active(of->sd)) {
		mutex_unlock(&of->mutex);
		len = -ENODEV;
		goto out_free;
	}

367
368
369
	ops = kernfs_ops(of->sd);
	if (ops->write)
		len = ops->write(of, buf, len, *ppos);
370
	else
371
		len = -EINVAL;
372
373
374
375

	sysfs_put_active(of->sd);
	mutex_unlock(&of->mutex);

Linus Torvalds's avatar
Linus Torvalds committed
376
377
	if (len > 0)
		*ppos += len;
Tejun Heo's avatar
Tejun Heo committed
378
379
out_free:
	kfree(buf);
Linus Torvalds's avatar
Linus Torvalds committed
380
381
382
	return len;
}

383
384
385
386
387
388
389
390
391
392
393
394
395
static int sysfs_kf_bin_mmap(struct sysfs_open_file *of,
			     struct vm_area_struct *vma)
{
	struct bin_attribute *battr = of->sd->priv;
	struct kobject *kobj = of->sd->s_parent->priv;

	if (!battr->mmap)
		return -ENODEV;

	return battr->mmap(of->file, kobj, battr, vma);
}

static void kernfs_vma_open(struct vm_area_struct *vma)
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
{
	struct file *file = vma->vm_file;
	struct sysfs_open_file *of = sysfs_of(file);

	if (!of->vm_ops)
		return;

	if (!sysfs_get_active(of->sd))
		return;

	if (of->vm_ops->open)
		of->vm_ops->open(vma);

	sysfs_put_active(of->sd);
}

412
static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
{
	struct file *file = vma->vm_file;
	struct sysfs_open_file *of = sysfs_of(file);
	int ret;

	if (!of->vm_ops)
		return VM_FAULT_SIGBUS;

	if (!sysfs_get_active(of->sd))
		return VM_FAULT_SIGBUS;

	ret = VM_FAULT_SIGBUS;
	if (of->vm_ops->fault)
		ret = of->vm_ops->fault(vma, vmf);

	sysfs_put_active(of->sd);
	return ret;
}

432
433
static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma,
				   struct vm_fault *vmf)
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
{
	struct file *file = vma->vm_file;
	struct sysfs_open_file *of = sysfs_of(file);
	int ret;

	if (!of->vm_ops)
		return VM_FAULT_SIGBUS;

	if (!sysfs_get_active(of->sd))
		return VM_FAULT_SIGBUS;

	ret = 0;
	if (of->vm_ops->page_mkwrite)
		ret = of->vm_ops->page_mkwrite(vma, vmf);
	else
		file_update_time(file);

	sysfs_put_active(of->sd);
	return ret;
}

455
456
static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
			     void *buf, int len, int write)
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
{
	struct file *file = vma->vm_file;
	struct sysfs_open_file *of = sysfs_of(file);
	int ret;

	if (!of->vm_ops)
		return -EINVAL;

	if (!sysfs_get_active(of->sd))
		return -EINVAL;

	ret = -EINVAL;
	if (of->vm_ops->access)
		ret = of->vm_ops->access(vma, addr, buf, len, write);

	sysfs_put_active(of->sd);
	return ret;
}

#ifdef CONFIG_NUMA
477
478
static int kernfs_vma_set_policy(struct vm_area_struct *vma,
				 struct mempolicy *new)
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
{
	struct file *file = vma->vm_file;
	struct sysfs_open_file *of = sysfs_of(file);
	int ret;

	if (!of->vm_ops)
		return 0;

	if (!sysfs_get_active(of->sd))
		return -EINVAL;

	ret = 0;
	if (of->vm_ops->set_policy)
		ret = of->vm_ops->set_policy(vma, new);

	sysfs_put_active(of->sd);
	return ret;
}

498
499
static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
					       unsigned long addr)
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
{
	struct file *file = vma->vm_file;
	struct sysfs_open_file *of = sysfs_of(file);
	struct mempolicy *pol;

	if (!of->vm_ops)
		return vma->vm_policy;

	if (!sysfs_get_active(of->sd))
		return vma->vm_policy;

	pol = vma->vm_policy;
	if (of->vm_ops->get_policy)
		pol = of->vm_ops->get_policy(vma, addr);

	sysfs_put_active(of->sd);
	return pol;
}

519
520
521
static int kernfs_vma_migrate(struct vm_area_struct *vma,
			      const nodemask_t *from, const nodemask_t *to,
			      unsigned long flags)
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
{
	struct file *file = vma->vm_file;
	struct sysfs_open_file *of = sysfs_of(file);
	int ret;

	if (!of->vm_ops)
		return 0;

	if (!sysfs_get_active(of->sd))
		return 0;

	ret = 0;
	if (of->vm_ops->migrate)
		ret = of->vm_ops->migrate(vma, from, to, flags);

	sysfs_put_active(of->sd);
	return ret;
}
#endif

542
543
544
545
546
static const struct vm_operations_struct kernfs_vm_ops = {
	.open		= kernfs_vma_open,
	.fault		= kernfs_vma_fault,
	.page_mkwrite	= kernfs_vma_page_mkwrite,
	.access		= kernfs_vma_access,
547
#ifdef CONFIG_NUMA
548
549
550
	.set_policy	= kernfs_vma_set_policy,
	.get_policy	= kernfs_vma_get_policy,
	.migrate	= kernfs_vma_migrate,
551
552
553
#endif
};

554
static int kernfs_file_mmap(struct file *file, struct vm_area_struct *vma)
555
556
{
	struct sysfs_open_file *of = sysfs_of(file);
557
	const struct kernfs_ops *ops;
558
559
560
561
562
563
564
565
	int rc;

	mutex_lock(&of->mutex);

	rc = -ENODEV;
	if (!sysfs_get_active(of->sd))
		goto out_unlock;

566
567
568
	ops = kernfs_ops(of->sd);
	if (ops->mmap)
		rc = ops->mmap(of, vma);
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
	if (rc)
		goto out_put;

	/*
	 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
	 * to satisfy versions of X which crash if the mmap fails: that
	 * substitutes a new vm_file, and we don't then want bin_vm_ops.
	 */
	if (vma->vm_file != file)
		goto out_put;

	rc = -EINVAL;
	if (of->mmapped && of->vm_ops != vma->vm_ops)
		goto out_put;

	/*
	 * It is not possible to successfully wrap close.
	 * So error if someone is trying to use close.
	 */
	rc = -EINVAL;
	if (vma->vm_ops && vma->vm_ops->close)
		goto out_put;

	rc = 0;
	of->mmapped = 1;
	of->vm_ops = vma->vm_ops;
595
	vma->vm_ops = &kernfs_vm_ops;
596
597
598
599
600
601
602
603
out_put:
	sysfs_put_active(of->sd);
out_unlock:
	mutex_unlock(&of->mutex);

	return rc;
}

Tejun Heo's avatar
Tejun Heo committed
604
605
606
/**
 *	sysfs_get_open_dirent - get or create sysfs_open_dirent
 *	@sd: target sysfs_dirent
607
 *	@of: sysfs_open_file for this instance of open
Tejun Heo's avatar
Tejun Heo committed
608
609
 *
 *	If @sd->s_attr.open exists, increment its reference count;
610
 *	otherwise, create one.  @of is chained to the files list.
Tejun Heo's avatar
Tejun Heo committed
611
612
613
614
615
616
617
618
 *
 *	LOCKING:
 *	Kernel thread context (may sleep).
 *
 *	RETURNS:
 *	0 on success, -errno on failure.
 */
static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
619
				 struct sysfs_open_file *of)
Tejun Heo's avatar
Tejun Heo committed
620
621
622
623
{
	struct sysfs_open_dirent *od, *new_od = NULL;

 retry:
Tejun Heo's avatar
Tejun Heo committed
624
	mutex_lock(&sysfs_open_file_mutex);
625
	spin_lock_irq(&sysfs_open_dirent_lock);
Tejun Heo's avatar
Tejun Heo committed
626
627
628
629
630
631
632
633
634

	if (!sd->s_attr.open && new_od) {
		sd->s_attr.open = new_od;
		new_od = NULL;
	}

	od = sd->s_attr.open;
	if (od) {
		atomic_inc(&od->refcnt);
635
		list_add_tail(&of->list, &od->files);
Tejun Heo's avatar
Tejun Heo committed
636
637
	}

638
	spin_unlock_irq(&sysfs_open_dirent_lock);
Tejun Heo's avatar
Tejun Heo committed
639
	mutex_unlock(&sysfs_open_file_mutex);
Tejun Heo's avatar
Tejun Heo committed
640
641
642
643
644
645
646
647
648
649
650
651

	if (od) {
		kfree(new_od);
		return 0;
	}

	/* not there, initialize a new one and retry */
	new_od = kmalloc(sizeof(*new_od), GFP_KERNEL);
	if (!new_od)
		return -ENOMEM;

	atomic_set(&new_od->refcnt, 0);
652
653
	atomic_set(&new_od->event, 1);
	init_waitqueue_head(&new_od->poll);
654
	INIT_LIST_HEAD(&new_od->files);
Tejun Heo's avatar
Tejun Heo committed
655
656
657
658
659
660
	goto retry;
}

/**
 *	sysfs_put_open_dirent - put sysfs_open_dirent
 *	@sd: target sysfs_dirent
661
 *	@of: associated sysfs_open_file
Tejun Heo's avatar
Tejun Heo committed
662
 *
663
664
 *	Put @sd->s_attr.open and unlink @of from the files list.  If
 *	reference count reaches zero, disassociate and free it.
Tejun Heo's avatar
Tejun Heo committed
665
666
667
668
669
 *
 *	LOCKING:
 *	None.
 */
static void sysfs_put_open_dirent(struct sysfs_dirent *sd,
670
				  struct sysfs_open_file *of)
Tejun Heo's avatar
Tejun Heo committed
671
672
{
	struct sysfs_open_dirent *od = sd->s_attr.open;
673
	unsigned long flags;
Tejun Heo's avatar
Tejun Heo committed
674

Tejun Heo's avatar
Tejun Heo committed
675
	mutex_lock(&sysfs_open_file_mutex);
676
	spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
Tejun Heo's avatar
Tejun Heo committed
677

678
679
680
	if (of)
		list_del(&of->list);

Tejun Heo's avatar
Tejun Heo committed
681
682
683
684
685
	if (atomic_dec_and_test(&od->refcnt))
		sd->s_attr.open = NULL;
	else
		od = NULL;

686
	spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
Tejun Heo's avatar
Tejun Heo committed
687
	mutex_unlock(&sysfs_open_file_mutex);
Tejun Heo's avatar
Tejun Heo committed
688
689
690
691

	kfree(od);
}

692
static int kernfs_file_open(struct inode *inode, struct file *file)
Linus Torvalds's avatar
Linus Torvalds committed
693
{
694
	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
695
	const struct kernfs_ops *ops;
696
	struct sysfs_open_file *of;
697
	bool has_read, has_write, has_mmap;
698
	int error = -EACCES;
Linus Torvalds's avatar
Linus Torvalds committed
699

700
	if (!sysfs_get_active(attr_sd))
701
		return -ENODEV;
Linus Torvalds's avatar
Linus Torvalds committed
702

703
	ops = kernfs_ops(attr_sd);
Linus Torvalds's avatar
Linus Torvalds committed
704

705
706
707
	has_read = ops->seq_show || ops->read || ops->mmap;
	has_write = ops->write || ops->mmap;
	has_mmap = ops->mmap;
Linus Torvalds's avatar
Linus Torvalds committed
708

709
710
711
712
713
714
715
716
717
	/* check perms and supported operations */
	if ((file->f_mode & FMODE_WRITE) &&
	    (!(inode->i_mode & S_IWUGO) || !has_write))
		goto err_out;

	if ((file->f_mode & FMODE_READ) &&
	    (!(inode->i_mode & S_IRUGO) || !has_read))
		goto err_out;

718
	/* allocate a sysfs_open_file for the file */
719
	error = -ENOMEM;
720
721
	of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL);
	if (!of)
722
		goto err_out;
Linus Torvalds's avatar
Linus Torvalds committed
723

724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
	/*
	 * The following is done to give a different lockdep key to
	 * @of->mutex for files which implement mmap.  This is a rather
	 * crude way to avoid false positive lockdep warning around
	 * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and
	 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
	 * which mm->mmap_sem nests, while holding @of->mutex.  As each
	 * open file has a separate mutex, it's okay as long as those don't
	 * happen on the same file.  At this point, we can't easily give
	 * each file a separate locking class.  Let's differentiate on
	 * whether the file has mmap or not for now.
	 */
	if (has_mmap)
		mutex_init(&of->mutex);
	else
		mutex_init(&of->mutex);

741
742
	of->sd = attr_sd;
	of->file = file;
743
744

	/*
745
746
747
	 * Always instantiate seq_file even if read access doesn't use
	 * seq_file or is not requested.  This unifies private data access
	 * and readable regular files are the vast majority anyway.
748
	 */
749
	if (ops->seq_show)
750
		error = seq_open(file, &kernfs_seq_ops);
751
752
	else
		error = seq_open(file, NULL);
753
754
755
	if (error)
		goto err_free;

756
757
	((struct seq_file *)file->private_data)->private = of;

758
759
760
	/* seq_file clears PWRITE unconditionally, restore it if WRITE */
	if (file->f_mode & FMODE_WRITE)
		file->f_mode |= FMODE_PWRITE;
761

Tejun Heo's avatar
Tejun Heo committed
762
	/* make sure we have open dirent struct */
763
	error = sysfs_get_open_dirent(attr_sd, of);
Tejun Heo's avatar
Tejun Heo committed
764
	if (error)
765
		goto err_close;
Tejun Heo's avatar
Tejun Heo committed
766

767
	/* open succeeded, put active references */
768
	sysfs_put_active(attr_sd);
769
770
	return 0;

771
err_close:
772
	seq_release(inode, file);
773
err_free:
774
	kfree(of);
775
err_out:
776
	sysfs_put_active(attr_sd);
Linus Torvalds's avatar
Linus Torvalds committed
777
778
779
	return error;
}

780
static int kernfs_file_release(struct inode *inode, struct file *filp)
Linus Torvalds's avatar
Linus Torvalds committed
781
{
Tejun Heo's avatar
Tejun Heo committed
782
	struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata;
783
	struct sysfs_open_file *of = sysfs_of(filp);
Linus Torvalds's avatar
Linus Torvalds committed
784

785
	sysfs_put_open_dirent(sd, of);
786
	seq_release(inode, filp);
787
	kfree(of);
788

Linus Torvalds's avatar
Linus Torvalds committed
789
790
791
	return 0;
}

792
793
794
795
796
void sysfs_unmap_bin_file(struct sysfs_dirent *sd)
{
	struct sysfs_open_dirent *od;
	struct sysfs_open_file *of;

797
	if (!(sd->s_flags & SYSFS_FLAG_HAS_MMAP))
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
		return;

	spin_lock_irq(&sysfs_open_dirent_lock);
	od = sd->s_attr.open;
	if (od)
		atomic_inc(&od->refcnt);
	spin_unlock_irq(&sysfs_open_dirent_lock);
	if (!od)
		return;

	mutex_lock(&sysfs_open_file_mutex);
	list_for_each_entry(of, &od->files, list) {
		struct inode *inode = file_inode(of->file);
		unmap_mapping_range(inode->i_mapping, 0, 0, 1);
	}
	mutex_unlock(&sysfs_open_file_mutex);

	sysfs_put_open_dirent(sd, NULL);
}

818
819
820
821
822
823
824
/* Sysfs attribute files are pollable.  The idea is that you read
 * the content and then you use 'poll' or 'select' to wait for
 * the content to change.  When the content changes (assuming the
 * manager for the kobject supports notification), poll will
 * return POLLERR|POLLPRI, and select will return the fd whether
 * it is waiting for read, write, or exceptions.
 * Once poll/select indicates that the value has changed, you
825
 * need to close and re-open the file, or seek to 0 and read again.
826
827
 * Reminder: this only works for attributes which actively support
 * it, and it is not possible to test an attribute from userspace
828
 * to see if it supports poll (Neither 'poll' nor 'select' return
829
830
 * an appropriate error code).  When in doubt, set a suitable timeout value.
 */
831
static unsigned int kernfs_file_poll(struct file *filp, poll_table *wait)
832
{
833
	struct sysfs_open_file *of = sysfs_of(filp);
834
	struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
835
	struct sysfs_open_dirent *od = attr_sd->s_attr.open;
836
837

	/* need parent for the kobj, grab both */
838
	if (!sysfs_get_active(attr_sd))
839
		goto trigger;
840

841
	poll_wait(filp, &od->poll, wait);
842

843
	sysfs_put_active(attr_sd);
844

845
	if (of->event != atomic_read(&od->event))
846
		goto trigger;
847

848
	return DEFAULT_POLLMASK;
849
850

 trigger:
851
	return DEFAULT_POLLMASK|POLLERR|POLLPRI;
852
853
}

854
855
856
void sysfs_notify_dirent(struct sysfs_dirent *sd)
{
	struct sysfs_open_dirent *od;
857
	unsigned long flags;
858

859
	spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
860

861
862
863
864
865
866
	if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) {
		od = sd->s_attr.open;
		if (od) {
			atomic_inc(&od->event);
			wake_up_interruptible(&od->poll);
		}
867
868
	}

869
	spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
870
871
872
}
EXPORT_SYMBOL_GPL(sysfs_notify_dirent);

873
void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
874
{
875
	struct sysfs_dirent *sd = k->sd;
876

877
878
879
	mutex_lock(&sysfs_mutex);

	if (sd && dir)
Tejun Heo's avatar
Tejun Heo committed
880
		sd = sysfs_find_dirent(sd, dir, NULL);
881
	if (sd && attr)
Tejun Heo's avatar
Tejun Heo committed
882
		sd = sysfs_find_dirent(sd, attr, NULL);
883
884
	if (sd)
		sysfs_notify_dirent(sd);
885
886

	mutex_unlock(&sysfs_mutex);
887
888
889
}
EXPORT_SYMBOL_GPL(sysfs_notify);

890
const struct file_operations kernfs_file_operations = {
891
	.read		= kernfs_file_read,
892
	.write		= kernfs_file_write,
893
	.llseek		= generic_file_llseek,
894
	.mmap		= kernfs_file_mmap,
895
896
897
	.open		= kernfs_file_open,
	.release	= kernfs_file_release,
	.poll		= kernfs_file_poll,
898
899
};

900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
static const struct kernfs_ops sysfs_file_kfops_empty = {
};

static const struct kernfs_ops sysfs_file_kfops_ro = {
	.seq_show	= sysfs_kf_seq_show,
};

static const struct kernfs_ops sysfs_file_kfops_wo = {
	.write		= sysfs_kf_write,
};

static const struct kernfs_ops sysfs_file_kfops_rw = {
	.seq_show	= sysfs_kf_seq_show,
	.write		= sysfs_kf_write,
};

static const struct kernfs_ops sysfs_bin_kfops_ro = {
	.read		= sysfs_kf_bin_read,
};

static const struct kernfs_ops sysfs_bin_kfops_wo = {
	.write		= sysfs_kf_bin_write,
};

static const struct kernfs_ops sysfs_bin_kfops_rw = {
	.read		= sysfs_kf_bin_read,
	.write		= sysfs_kf_bin_write,
	.mmap		= sysfs_kf_bin_mmap,
};

930
int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
931
			   const struct attribute *attr, bool is_bin,
932
			   umode_t mode, const void *ns)
Linus Torvalds's avatar
Linus Torvalds committed
933
{
934
	const struct kernfs_ops *ops;
935
	struct sysfs_dirent *sd;
936
	loff_t size;
Linus Torvalds's avatar
Linus Torvalds committed
937

938
	if (!is_bin) {
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
		struct kobject *kobj = dir_sd->priv;
		const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops;

		/* every kobject with an attribute needs a ktype assigned */
		if (WARN(!sysfs_ops, KERN_ERR
			 "missing sysfs attribute operations for kobject: %s\n",
			 kobject_name(kobj)))
			return -EINVAL;

		if (sysfs_ops->show && sysfs_ops->store)
			ops = &sysfs_file_kfops_rw;
		else if (sysfs_ops->show)
			ops = &sysfs_file_kfops_ro;
		else if (sysfs_ops->store)
			ops = &sysfs_file_kfops_wo;
		else
			ops = &sysfs_file_kfops_empty;
956
957

		size = PAGE_SIZE;
958
959
960
961
962
963
964
965
966
967
968
	} else {
		struct bin_attribute *battr = (void *)attr;

		if ((battr->read && battr->write) || battr->mmap)
			ops = &sysfs_bin_kfops_rw;
		else if (battr->read)
			ops = &sysfs_bin_kfops_ro;
		else if (battr->write)
			ops = &sysfs_bin_kfops_wo;
		else
			ops = &sysfs_file_kfops_empty;
969
970

		size = battr->size;
971
972
	}

973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
	sd = kernfs_create_file_ns(dir_sd, attr->name, mode, size,
				   ops, (void *)attr, ns);
	if (IS_ERR(sd)) {
		if (PTR_ERR(sd) == -EEXIST)
			sysfs_warn_dup(dir_sd, attr->name);
		return PTR_ERR(sd);
	}
	return 0;
}

/**
 * kernfs_create_file_ns - create a file
 * @parent: directory to create the file in
 * @name: name of the file
 * @mode: mode of the file
 * @size: size of the file
 * @ops: kernfs operations for the file
 * @priv: private data for the file
 * @ns: optional namespace tag of the file
 *
 * Returns the created node on success, ERR_PTR() value on error.
 */
struct sysfs_dirent *kernfs_create_file_ns(struct sysfs_dirent *parent,
					   const char *name,
					   umode_t mode, loff_t size,
					   const struct kernfs_ops *ops,
					   void *priv, const void *ns)
{
	struct sysfs_addrm_cxt acxt;
	struct sysfs_dirent *sd;
	int rc;

	sd = sysfs_new_dirent(name, (mode & S_IALLUGO) | S_IFREG,
			      SYSFS_KOBJ_ATTR);
1007
	if (!sd)
1008
		return ERR_PTR(-ENOMEM);
1009

1010
	sd->s_attr.ops = ops;
1011
	sd->s_attr.size = size;
1012
	sd->s_ns = ns;
1013
	sd->priv = priv;
1014
	sysfs_dirent_init_lockdep(sd);
Linus Torvalds's avatar
Linus Torvalds committed
1015

1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
	/*
	 * sd->s_attr.ops is accesible only while holding active ref.  We
	 * need to know whether some ops are implemented outside active
	 * ref.  Cache their existence in flags.
	 */
	if (ops->seq_show)
		sd->s_flags |= SYSFS_FLAG_HAS_SEQ_SHOW;
	if (ops->mmap)
		sd->s_flags |= SYSFS_FLAG_HAS_MMAP;

1026
	sysfs_addrm_start(&acxt);
1027
	rc = sysfs_add_one(&acxt, sd, parent);
1028
	sysfs_addrm_finish(&acxt);
1029

1030
	if (rc) {
1031
		sysfs_put(sd);
1032
1033
1034
		return ERR_PTR(rc);
	}
	return sd;
Linus Torvalds's avatar
Linus Torvalds committed
1035
1036
}

1037
int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
1038
		   bool is_bin)
1039
{
1040
	return sysfs_add_file_mode_ns(dir_sd, attr, is_bin, attr->mode, NULL);
1041
1042
}

Linus Torvalds's avatar
Linus Torvalds committed
1043
/**
1044
1045
1046
1047
 * sysfs_create_file_ns - create an attribute file for an object with custom ns
 * @kobj: object we're creating for
 * @attr: attribute descriptor
 * @ns: namespace the new file should belong to
Linus Torvalds's avatar
Linus Torvalds committed
1048
 */
1049
1050
int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
			 const void *ns)
Linus Torvalds's avatar
Linus Torvalds committed
1051
{
1052
	BUG_ON(!kobj || !kobj->sd || !attr);
Linus Torvalds's avatar
Linus Torvalds committed
1053

1054
	return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns);
Linus Torvalds's avatar
Linus Torvalds committed
1055
1056

}
1057
EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
Linus Torvalds's avatar
Linus Torvalds committed
1058

1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
int sysfs_create_files(struct kobject *kobj, const struct attribute **ptr)
{
	int err = 0;
	int i;

	for (i = 0; ptr[i] && !err; i++)
		err = sysfs_create_file(kobj, ptr[i]);
	if (err)
		while (--i >= 0)
			sysfs_remove_file(kobj, ptr[i]);
	return err;
}
1071
EXPORT_SYMBOL_GPL(sysfs_create_files);
Linus Torvalds's avatar
Linus Torvalds committed
1072

1073
1074
1075
1076
1077
1078
1079
1080
1081
/**
 * sysfs_add_file_to_group - add an attribute file to a pre-existing group.
 * @kobj: object we're acting for.
 * @attr: attribute descriptor.
 * @group: group name.
 */
int sysfs_add_file_to_group(struct kobject *kobj,
		const struct attribute *attr, const char *group)
{
1082
	struct sysfs_dirent *dir_sd;
1083
1084
	int error;

1085
	if (group)
Tejun Heo's avatar
Tejun Heo committed
1086
		dir_sd = sysfs_get_dirent(kobj->sd, group);
1087
1088
1089
	else
		dir_sd = sysfs_get(kobj->sd);

1090
1091
1092
	if (!dir_sd)
		return -ENOENT;

1093
	error = sysfs_add_file(dir_sd, attr, false);
1094
1095
	sysfs_put(dir_sd);

1096
1097
1098
1099
	return error;
}
EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);

1100
1101
1102
1103
1104
1105
1106
/**
 * sysfs_chmod_file - update the modified mode value on an object attribute.
 * @kobj: object we're acting for.
 * @attr: attribute descriptor.
 * @mode: file permissions.
 *
 */
1107
int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
Al Viro's avatar
Al Viro committed
1108
		     umode_t mode)
1109
{
1110
	struct sysfs_dirent *sd;
1111
	struct iattr newattrs;
1112
1113
	int rc;

1114
	sd = sysfs_get_dirent(kobj->sd, attr->name);
1115
	if (!sd)
1116
		return -ENOENT;
1117

1118
	newattrs.ia_mode = (mode & S_IALLUGO) | (sd->s_mode & ~S_IALLUGO);
1119
	newattrs.ia_valid = ATTR_MODE;
1120

1121
1122
1123
	rc = kernfs_setattr(sd, &newattrs);

	sysfs_put(sd);
1124
	return rc;
1125
1126
1127
}
EXPORT_SYMBOL_GPL(sysfs_chmod_file);

Linus Torvalds's avatar
Linus Torvalds committed
1128
/**
1129
1130
1131
1132
 * sysfs_remove_file_ns - remove an object attribute with a custom ns tag
 * @kobj: object we're acting for
 * @attr: attribute descriptor
 * @ns: namespace tag of the file to remove
Linus Torvalds's avatar
Linus Torvalds committed
1133
 *
1134
 * Hash the attribute name and namespace tag and kill the victim.
Linus Torvalds's avatar
Linus Torvalds committed
1135
 */
1136
1137
void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
			  const void *ns)
Linus Torvalds's avatar
Linus Torvalds committed
1138
{
1139
	struct sysfs_dirent *dir_sd = kobj->sd;
1140

1141
	kernfs_remove_by_name_ns(dir_sd, attr->name, ns);
Linus Torvalds's avatar
Linus Torvalds committed
1142
}
1143
EXPORT_SYMBOL_GPL(sysfs_remove_file_ns);
Linus Torvalds's avatar
Linus Torvalds committed
1144

1145
void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr)
1146
1147
1148
1149
1150
{
	int i;
	for (i = 0; ptr[i]; i++)
		sysfs_remove_file(kobj, ptr[i]);
}
1151
EXPORT_SYMBOL_GPL(sysfs_remove_files);
Linus Torvalds's avatar
Linus Torvalds committed
1152

1153
1154
1155
1156
1157
1158
1159
1160
1161
/**
 * sysfs_remove_file_from_group - remove an attribute file from a group.
 * @kobj: object we're acting for.
 * @attr: attribute descriptor.
 * @group: group name.
 */
void sysfs_remove_file_from_group(struct kobject *kobj,
		const struct attribute *attr, const char *group)
{
1162
	struct sysfs_dirent *dir_sd;
1163

1164
	if (group)
Tejun Heo's avatar
Tejun Heo committed
1165
		dir_sd = sysfs_get_dirent(kobj->sd, group);
1166
1167
	else
		dir_sd = sysfs_get(kobj->sd);
1168
	if (dir_sd) {
1169
		kernfs_remove_by_name(dir_sd, attr->name);
1170
		sysfs_put(dir_sd);
1171
1172
1173
1174
	}
}
EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);

1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
/**
 *	sysfs_create_bin_file - create binary file for object.
 *	@kobj:	object.
 *	@attr:	attribute descriptor.
 */
int sysfs_create_bin_file(struct kobject *kobj,
			  const struct bin_attribute *attr)
{
	BUG_ON(!kobj || !kobj->sd || !attr);

1185
	return sysfs_add_file(kobj->sd, &attr->attr, true);
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
}
EXPORT_SYMBOL_GPL(sysfs_create_bin_file);

/**
 *	sysfs_remove_bin_file - remove binary file for object.
 *	@kobj:	object.
 *	@attr:	attribute descriptor.
 */
void sysfs_remove_bin_file(struct kobject *kobj,
			   const struct bin_attribute *attr)
{
1197
	kernfs_remove_by_name(kobj->sd, attr->attr.name);
1198
1199
1200
}
EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);

1201
struct sysfs_schedule_callback_struct {
1202
1203
	struct list_head	workq_list;
	struct kobject		*kobj;
1204
1205
	void			(*func)(void *);
	void			*data;
1206