coredump.c 21.2 KB
Newer Older
1
2
3
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/fdtable.h>
4
#include <linux/freezer.h>
5
6
7
8
9
10
11
12
13
14
15
16
17
#include <linux/mm.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/swap.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/pagemap.h>
#include <linux/perf_event.h>
#include <linux/highmem.h>
#include <linux/spinlock.h>
#include <linux/key.h>
#include <linux/personality.h>
#include <linux/binfmts.h>
18
#include <linux/coredump.h>
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#include <linux/utsname.h>
#include <linux/pid_namespace.h>
#include <linux/module.h>
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/audit.h>
#include <linux/tracehook.h>
#include <linux/kmod.h>
#include <linux/fsnotify.h>
#include <linux/fs_struct.h>
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
#include <linux/compat.h>
36
37
38
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/path.h>
39
#include <linux/timekeeping.h>
40

41
#include <linux/uaccess.h>
42
43
44
45
46
47
48
49
50
51
52
#include <asm/mmu_context.h>
#include <asm/tlb.h>
#include <asm/exec.h>

#include <trace/events/task.h>
#include "internal.h"

#include <trace/events/sched.h>

int core_uses_pid;
unsigned int core_pipe_limit;
53
54
char core_pattern[CORENAME_MAX_SIZE] = "core";
static int core_name_size = CORENAME_MAX_SIZE;
55
56
57
58
59
60
61
62

struct core_name {
	char *corename;
	int used, size;
};

/* The maximal length of core_pattern is also specified in sysctl.c */

63
static int expand_corename(struct core_name *cn, int size)
64
{
65
	char *corename = krealloc(cn->corename, size, GFP_KERNEL);
66

67
	if (!corename)
68
69
		return -ENOMEM;

70
71
72
73
	if (size > core_name_size) /* racy but harmless */
		core_name_size = size;

	cn->size = ksize(corename);
74
	cn->corename = corename;
75
76
77
	return 0;
}

78
79
static __printf(2, 0) int cn_vprintf(struct core_name *cn, const char *fmt,
				     va_list arg)
80
{
81
	int free, need;
82
	va_list arg_copy;
83

84
85
again:
	free = cn->size - cn->used;
86
87
88
89
90

	va_copy(arg_copy, arg);
	need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy);
	va_end(arg_copy);

91
92
93
94
	if (need < free) {
		cn->used += need;
		return 0;
	}
95

96
	if (!expand_corename(cn, cn->size + need - free + 1))
97
		goto again;
98

99
	return -ENOMEM;
100
101
}

102
static __printf(2, 3) int cn_printf(struct core_name *cn, const char *fmt, ...)
103
104
105
106
107
108
109
110
111
112
113
{
	va_list arg;
	int ret;

	va_start(arg, fmt);
	ret = cn_vprintf(cn, fmt, arg);
	va_end(arg);

	return ret;
}

114
115
static __printf(2, 3)
int cn_esc_printf(struct core_name *cn, const char *fmt, ...)
116
{
117
118
119
120
121
122
123
124
	int cur = cn->used;
	va_list arg;
	int ret;

	va_start(arg, fmt);
	ret = cn_vprintf(cn, fmt, arg);
	va_end(arg);

125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
	if (ret == 0) {
		/*
		 * Ensure that this coredump name component can't cause the
		 * resulting corefile path to consist of a ".." or ".".
		 */
		if ((cn->used - cur == 1 && cn->corename[cur] == '.') ||
				(cn->used - cur == 2 && cn->corename[cur] == '.'
				&& cn->corename[cur+1] == '.'))
			cn->corename[cur] = '!';

		/*
		 * Empty names are fishy and could be used to create a "//" in a
		 * corefile name, causing the coredump to happen one directory
		 * level too high. Enforce that all components of the core
		 * pattern are at least one character long.
		 */
		if (cn->used == cur)
			ret = cn_printf(cn, "!");
	}

145
146
147
148
149
	for (; cur < cn->used; ++cur) {
		if (cn->corename[cur] == '/')
			cn->corename[cur] = '!';
	}
	return ret;
150
151
152
153
154
155
156
157
158
}

static int cn_print_exe_file(struct core_name *cn)
{
	struct file *exe_file;
	char *pathbuf, *path;
	int ret;

	exe_file = get_mm_exe_file(current->mm);
159
160
	if (!exe_file)
		return cn_esc_printf(cn, "%s (path unknown)", current->comm);
161
162
163
164
165
166
167

	pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
	if (!pathbuf) {
		ret = -ENOMEM;
		goto put_exe_file;
	}

Miklos Szeredi's avatar
Miklos Szeredi committed
168
	path = file_path(exe_file, pathbuf, PATH_MAX);
169
170
171
172
173
	if (IS_ERR(path)) {
		ret = PTR_ERR(path);
		goto free_buf;
	}

174
	ret = cn_esc_printf(cn, "%s", path);
175
176
177
178
179
180
181
182
183
184
185
186

free_buf:
	kfree(pathbuf);
put_exe_file:
	fput(exe_file);
	return ret;
}

/* format_corename will inspect the pattern parameter, and output a
 * name into corename, which must have space for at least
 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
 */
187
static int format_corename(struct core_name *cn, struct coredump_params *cprm)
188
189
190
191
192
193
194
{
	const struct cred *cred = current_cred();
	const char *pat_ptr = core_pattern;
	int ispipe = (*pat_ptr == '|');
	int pid_in_pattern = 0;
	int err = 0;

195
	cn->used = 0;
196
197
	cn->corename = NULL;
	if (expand_corename(cn, core_name_size))
198
		return -ENOMEM;
199
200
201
202
	cn->corename[0] = '\0';

	if (ispipe)
		++pat_ptr;
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223

	/* Repeat as long as we have more pattern to process and more output
	   space */
	while (*pat_ptr) {
		if (*pat_ptr != '%') {
			err = cn_printf(cn, "%c", *pat_ptr++);
		} else {
			switch (*++pat_ptr) {
			/* single % at the end, drop that */
			case 0:
				goto out;
			/* Double percent, output one percent */
			case '%':
				err = cn_printf(cn, "%c", '%');
				break;
			/* pid */
			case 'p':
				pid_in_pattern = 1;
				err = cn_printf(cn, "%d",
					      task_tgid_vnr(current));
				break;
224
225
226
227
228
			/* global pid */
			case 'P':
				err = cn_printf(cn, "%d",
					      task_tgid_nr(current));
				break;
229
230
231
232
233
234
235
236
			case 'i':
				err = cn_printf(cn, "%d",
					      task_pid_vnr(current));
				break;
			case 'I':
				err = cn_printf(cn, "%d",
					      task_pid_nr(current));
				break;
237
238
			/* uid */
			case 'u':
239
240
241
				err = cn_printf(cn, "%u",
						from_kuid(&init_user_ns,
							  cred->uid));
242
243
244
				break;
			/* gid */
			case 'g':
245
246
247
				err = cn_printf(cn, "%u",
						from_kgid(&init_user_ns,
							  cred->gid));
248
				break;
249
250
251
252
			case 'd':
				err = cn_printf(cn, "%d",
					__get_dumpable(cprm->mm_flags));
				break;
253
254
			/* signal that caused the coredump */
			case 's':
255
256
				err = cn_printf(cn, "%d",
						cprm->siginfo->si_signo);
257
258
259
				break;
			/* UNIX time of coredump */
			case 't': {
260
261
262
263
				time64_t time;

				time = ktime_get_real_seconds();
				err = cn_printf(cn, "%lld", time);
264
265
266
				break;
			}
			/* hostname */
267
			case 'h':
268
				down_read(&uts_sem);
269
				err = cn_esc_printf(cn, "%s",
270
271
272
273
					      utsname()->nodename);
				up_read(&uts_sem);
				break;
			/* executable */
274
275
			case 'e':
				err = cn_esc_printf(cn, "%s", current->comm);
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
				break;
			case 'E':
				err = cn_print_exe_file(cn);
				break;
			/* core limit size */
			case 'c':
				err = cn_printf(cn, "%lu",
					      rlimit(RLIMIT_CORE));
				break;
			default:
				break;
			}
			++pat_ptr;
		}

		if (err)
			return err;
	}

295
out:
296
297
298
299
300
301
302
303
304
305
306
307
308
	/* Backward compatibility with core_uses_pid:
	 *
	 * If core_pattern does not include a %p (as is the default)
	 * and core_uses_pid is set, then .%pid will be appended to
	 * the filename. Do not do this for piped commands. */
	if (!ispipe && !pid_in_pattern && core_uses_pid) {
		err = cn_printf(cn, ".%d", task_tgid_vnr(current));
		if (err)
			return err;
	}
	return ispipe;
}

309
static int zap_process(struct task_struct *start, int exit_code, int flags)
310
311
312
313
{
	struct task_struct *t;
	int nr = 0;

314
315
	/* ignore all signals except SIGKILL, see prepare_signal() */
	start->signal->flags = SIGNAL_GROUP_COREDUMP | flags;
316
317
318
	start->signal->group_exit_code = exit_code;
	start->signal->group_stop_count = 0;

319
	for_each_thread(start, t) {
320
321
322
323
324
325
		task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
		if (t != current && t->mm) {
			sigaddset(&t->pending.signal, SIGKILL);
			signal_wake_up(t, 1);
			nr++;
		}
326
	}
327
328
329
330

	return nr;
}

331
332
static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
			struct core_state *core_state, int exit_code)
333
334
335
336
337
338
339
340
{
	struct task_struct *g, *p;
	unsigned long flags;
	int nr = -EAGAIN;

	spin_lock_irq(&tsk->sighand->siglock);
	if (!signal_group_exit(tsk->signal)) {
		mm->core_state = core_state;
341
		tsk->signal->group_exit_task = tsk;
342
		nr = zap_process(tsk, exit_code, 0);
343
		clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
344
345
346
347
348
	}
	spin_unlock_irq(&tsk->sighand->siglock);
	if (unlikely(nr < 0))
		return nr;

349
	tsk->flags |= PF_DUMPCORE;
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
	if (atomic_read(&mm->mm_users) == nr + 1)
		goto done;
	/*
	 * We should find and kill all tasks which use this mm, and we should
	 * count them correctly into ->nr_threads. We don't take tasklist
	 * lock, but this is safe wrt:
	 *
	 * fork:
	 *	None of sub-threads can fork after zap_process(leader). All
	 *	processes which were created before this point should be
	 *	visible to zap_threads() because copy_process() adds the new
	 *	process to the tail of init_task.tasks list, and lock/unlock
	 *	of ->siglock provides a memory barrier.
	 *
	 * do_exit:
	 *	The caller holds mm->mmap_sem. This means that the task which
	 *	uses this mm can't pass exit_mm(), so it can't exit or clear
	 *	its ->mm.
	 *
	 * de_thread:
	 *	It does list_replace_rcu(&leader->tasks, &current->tasks),
	 *	we must see either old or new leader, this does not matter.
	 *	However, it can change p->sighand, so lock_task_sighand(p)
	 *	must be used. Since p->mm != NULL and we hold ->mmap_sem
	 *	it can't fail.
	 *
	 *	Note also that "g" can be the old leader with ->mm == NULL
	 *	and already unhashed and thus removed from ->thread_group.
	 *	This is OK, __unhash_process()->list_del_rcu() does not
	 *	clear the ->next pointer, we will find the new leader via
	 *	next_thread().
	 */
	rcu_read_lock();
	for_each_process(g) {
		if (g == tsk->group_leader)
			continue;
		if (g->flags & PF_KTHREAD)
			continue;
388
389
390
391
392
393
394
395
396

		for_each_thread(g, p) {
			if (unlikely(!p->mm))
				continue;
			if (unlikely(p->mm == mm)) {
				lock_task_sighand(p, &flags);
				nr += zap_process(p, exit_code,
							SIGNAL_GROUP_EXIT);
				unlock_task_sighand(p, &flags);
397
			}
398
399
			break;
		}
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
	}
	rcu_read_unlock();
done:
	atomic_set(&core_state->nr_threads, nr);
	return nr;
}

static int coredump_wait(int exit_code, struct core_state *core_state)
{
	struct task_struct *tsk = current;
	struct mm_struct *mm = tsk->mm;
	int core_waiters = -EBUSY;

	init_completion(&core_state->startup);
	core_state->dumper.task = tsk;
	core_state->dumper.next = NULL;

417
418
419
	if (down_write_killable(&mm->mmap_sem))
		return -EINTR;

420
421
422
423
424
425
426
	if (!mm->core_state)
		core_waiters = zap_threads(tsk, mm, core_state, exit_code);
	up_write(&mm->mmap_sem);

	if (core_waiters > 0) {
		struct core_thread *ptr;

427
		freezer_do_not_count();
428
		wait_for_completion(&core_state->startup);
429
		freezer_count();
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
		/*
		 * Wait for all the threads to become inactive, so that
		 * all the thread context (extended register state, like
		 * fpu etc) gets copied to the memory.
		 */
		ptr = core_state->dumper.next;
		while (ptr != NULL) {
			wait_task_inactive(ptr->task, 0);
			ptr = ptr->next;
		}
	}

	return core_waiters;
}

445
static void coredump_finish(struct mm_struct *mm, bool core_dumped)
446
447
448
449
{
	struct core_thread *curr, *next;
	struct task_struct *task;

450
	spin_lock_irq(&current->sighand->siglock);
451
452
	if (core_dumped && !__fatal_signal_pending(current))
		current->signal->group_exit_code |= 0x80;
453
454
455
456
	current->signal->group_exit_task = NULL;
	current->signal->flags = SIGNAL_GROUP_EXIT;
	spin_unlock_irq(&current->sighand->siglock);

457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
	next = mm->core_state->dumper.next;
	while ((curr = next) != NULL) {
		next = curr->next;
		task = curr->task;
		/*
		 * see exit_mm(), curr->task must not see
		 * ->task == NULL before we read ->next.
		 */
		smp_mb();
		curr->task = NULL;
		wake_up_process(task);
	}

	mm->core_state = NULL;
}

473
474
475
476
477
478
479
480
481
482
483
static bool dump_interrupted(void)
{
	/*
	 * SIGKILL or freezing() interrupt the coredumping. Perhaps we
	 * can do try_to_freeze() and check __fatal_signal_pending(),
	 * but then we need to teach dump_write() to restart and clear
	 * TIF_SIGPENDING.
	 */
	return signal_pending(current);
}

484
485
static void wait_for_dump_helpers(struct file *file)
{
486
	struct pipe_inode_info *pipe = file->private_data;
487
488
489
490

	pipe_lock(pipe);
	pipe->readers++;
	pipe->writers--;
491
492
493
	wake_up_interruptible_sync(&pipe->wait);
	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
	pipe_unlock(pipe);
494

495
496
497
498
499
	/*
	 * We actually want wait_event_freezable() but then we need
	 * to clear TIF_SIGPENDING and improve dump_interrupted().
	 */
	wait_event_interruptible(pipe->wait, pipe->readers == 1);
500

501
	pipe_lock(pipe);
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
	pipe->readers--;
	pipe->writers++;
	pipe_unlock(pipe);
}

/*
 * umh_pipe_setup
 * helper function to customize the process used
 * to collect the core in userspace.  Specifically
 * it sets up a pipe and installs it as fd 0 (stdin)
 * for the process.  Returns 0 on success, or
 * PTR_ERR on failure.
 * Note that it also sets the core limit to 1.  This
 * is a special value that we use to trap recursive
 * core dumps
 */
static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
{
	struct file *files[2];
	struct coredump_params *cp = (struct coredump_params *)info->data;
	int err = create_pipe_files(files, 0);
	if (err)
		return err;

	cp->file = files[1];

Al Viro's avatar
Al Viro committed
528
529
	err = replace_fd(0, files[0], 0);
	fput(files[0]);
530
531
532
	/* and disallow core files too */
	current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};

Al Viro's avatar
Al Viro committed
533
	return err;
534
535
}

Al Viro's avatar
Al Viro committed
536
void do_coredump(const siginfo_t *siginfo)
537
538
539
540
541
542
543
544
545
546
{
	struct core_state core_state;
	struct core_name cn;
	struct mm_struct *mm = current->mm;
	struct linux_binfmt * binfmt;
	const struct cred *old_cred;
	struct cred *cred;
	int retval = 0;
	int ispipe;
	struct files_struct *displaced;
547
548
	/* require nonrelative corefile path and be extra careful */
	bool need_suid_safe = false;
549
	bool core_dumped = false;
550
551
	static atomic_t core_dump_count = ATOMIC_INIT(0);
	struct coredump_params cprm = {
552
		.siginfo = siginfo,
553
		.regs = signal_pt_regs(),
554
555
556
557
558
559
560
561
562
		.limit = rlimit(RLIMIT_CORE),
		/*
		 * We must use the same mm->flags while dumping core to avoid
		 * inconsistency of bit flags, since this flag is not protected
		 * by any locks.
		 */
		.mm_flags = mm->flags,
	};

563
	audit_core_dumps(siginfo->si_signo);
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579

	binfmt = mm->binfmt;
	if (!binfmt || !binfmt->core_dump)
		goto fail;
	if (!__get_dumpable(cprm.mm_flags))
		goto fail;

	cred = prepare_creds();
	if (!cred)
		goto fail;
	/*
	 * We cannot trust fsuid as being the "true" uid of the process
	 * nor do we know its entire history. We only know it was tainted
	 * so we dump it as root in mode 2, and only into a controlled
	 * environment (pipe handler or fully qualified path).
	 */
580
	if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
581
582
		/* Setuid core dump mode */
		cred->fsuid = GLOBAL_ROOT_UID;	/* Dump root private */
583
		need_suid_safe = true;
584
585
	}

586
	retval = coredump_wait(siginfo->si_signo, &core_state);
587
588
589
590
591
	if (retval < 0)
		goto fail_creds;

	old_cred = override_creds(cred);

592
	ispipe = format_corename(&cn, &cprm);
593

594
	if (ispipe) {
595
596
		int dump_count;
		char **helper_argv;
597
		struct subprocess_info *sub_info;
598
599
600
601

		if (ispipe < 0) {
			printk(KERN_WARNING "format_corename failed\n");
			printk(KERN_WARNING "Aborting core\n");
602
			goto fail_unlock;
603
604
605
606
607
608
609
		}

		if (cprm.limit == 1) {
			/* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
			 *
			 * Normally core limits are irrelevant to pipes, since
			 * we're not writing to the file system, but we use
Bastien Nocera's avatar
Bastien Nocera committed
610
			 * cprm.limit of 1 here as a special value, this is a
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
			 * consistent way to catch recursive crashes.
			 * We can still crash if the core_pattern binary sets
			 * RLIM_CORE = !1, but it runs as root, and can do
			 * lots of stupid things.
			 *
			 * Note that we use task_tgid_vnr here to grab the pid
			 * of the process group leader.  That way we get the
			 * right pid if a thread in a multi-threaded
			 * core_pattern process dies.
			 */
			printk(KERN_WARNING
				"Process %d(%s) has RLIMIT_CORE set to 1\n",
				task_tgid_vnr(current), current->comm);
			printk(KERN_WARNING "Aborting core\n");
			goto fail_unlock;
		}
		cprm.limit = RLIM_INFINITY;

		dump_count = atomic_inc_return(&core_dump_count);
		if (core_pipe_limit && (core_pipe_limit < dump_count)) {
			printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
			       task_tgid_vnr(current), current->comm);
			printk(KERN_WARNING "Skipping core dump\n");
			goto fail_dropcount;
		}

637
		helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);
638
639
640
641
642
643
		if (!helper_argv) {
			printk(KERN_WARNING "%s failed to allocate memory\n",
			       __func__);
			goto fail_dropcount;
		}

644
645
646
647
648
649
650
651
		retval = -ENOMEM;
		sub_info = call_usermodehelper_setup(helper_argv[0],
						helper_argv, NULL, GFP_KERNEL,
						umh_pipe_setup, NULL, &cprm);
		if (sub_info)
			retval = call_usermodehelper_exec(sub_info,
							  UMH_WAIT_EXEC);

652
653
		argv_free(helper_argv);
		if (retval) {
654
			printk(KERN_INFO "Core dump to |%s pipe failed\n",
655
656
			       cn.corename);
			goto close_fail;
657
		}
658
659
	} else {
		struct inode *inode;
660
661
		int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
				 O_LARGEFILE | O_EXCL;
662
663
664
665

		if (cprm.limit < binfmt->min_coredump)
			goto fail_unlock;

666
		if (need_suid_safe && cn.corename[0] != '/') {
667
668
669
670
671
672
673
			printk(KERN_WARNING "Pid %d(%s) can only dump core "\
				"to fully qualified path!\n",
				task_tgid_vnr(current), current->comm);
			printk(KERN_WARNING "Skipping core dump\n");
			goto fail_unlock;
		}

674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
		/*
		 * Unlink the file if it exists unless this is a SUID
		 * binary - in that case, we're running around with root
		 * privs and don't want to unlink another user's coredump.
		 */
		if (!need_suid_safe) {
			mm_segment_t old_fs;

			old_fs = get_fs();
			set_fs(KERNEL_DS);
			/*
			 * If it doesn't exist, that's fine. If there's some
			 * other problem, we'll catch it at the filp_open().
			 */
			(void) sys_unlink((const char __user *)cn.corename);
			set_fs(old_fs);
		}

		/*
		 * There is a race between unlinking and creating the
		 * file, but if that causes an EEXIST here, that's
		 * fine - another process raced with us while creating
		 * the corefile, and the other process won. To userspace,
		 * what matters is that at least one of the two processes
		 * writes its coredump successfully, not which one.
		 */
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
		if (need_suid_safe) {
			/*
			 * Using user namespaces, normal user tasks can change
			 * their current->fs->root to point to arbitrary
			 * directories. Since the intention of the "only dump
			 * with a fully qualified path" rule is to control where
			 * coredumps may be placed using root privileges,
			 * current->fs->root must not be used. Instead, use the
			 * root directory of init_task.
			 */
			struct path root;

			task_lock(&init_task);
			get_fs_root(init_task.fs, &root);
			task_unlock(&init_task);
			cprm.file = file_open_root(root.dentry, root.mnt,
				cn.corename, open_flags, 0600);
			path_put(&root);
		} else {
			cprm.file = filp_open(cn.corename, open_flags, 0600);
		}
721
722
723
		if (IS_ERR(cprm.file))
			goto fail_unlock;

Al Viro's avatar
Al Viro committed
724
		inode = file_inode(cprm.file);
725
726
727
728
729
730
731
732
733
734
735
		if (inode->i_nlink > 1)
			goto close_fail;
		if (d_unhashed(cprm.file->f_path.dentry))
			goto close_fail;
		/*
		 * AK: actually i see no reason to not allow this for named
		 * pipes etc, but keep the previous behaviour for now.
		 */
		if (!S_ISREG(inode->i_mode))
			goto close_fail;
		/*
736
737
738
739
		 * Don't dump core if the filesystem changed owner or mode
		 * of the file during file creation. This is an issue when
		 * a process dumps core while its cwd is e.g. on a vfat
		 * filesystem.
740
741
742
		 */
		if (!uid_eq(inode->i_uid, current_fsuid()))
			goto close_fail;
743
744
		if ((inode->i_mode & 0677) != 0600)
			goto close_fail;
Al Viro's avatar
Al Viro committed
745
		if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
746
747
748
749
750
751
752
753
754
755
756
			goto close_fail;
		if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
			goto close_fail;
	}

	/* get us an unshared descriptor table; almost always a no-op */
	retval = unshare_files(&displaced);
	if (retval)
		goto close_fail;
	if (displaced)
		put_files_struct(displaced);
757
758
759
760
761
	if (!dump_interrupted()) {
		file_start_write(cprm.file);
		core_dumped = binfmt->core_dump(&cprm);
		file_end_write(cprm.file);
	}
762
763
764
765
766
767
768
769
770
771
	if (ispipe && core_pipe_limit)
		wait_for_dump_helpers(cprm.file);
close_fail:
	if (cprm.file)
		filp_close(cprm.file, NULL);
fail_dropcount:
	if (ispipe)
		atomic_dec(&core_dump_count);
fail_unlock:
	kfree(cn.corename);
772
	coredump_finish(mm, core_dumped);
773
774
775
776
777
778
779
780
781
782
783
784
	revert_creds(old_cred);
fail_creds:
	put_cred(cred);
fail:
	return;
}

/*
 * Core dumping helper functions.  These are the only things you should
 * do on a core-file: use only these functions to write out all the
 * necessary info.
 */
Al Viro's avatar
Al Viro committed
785
786
787
int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
{
	struct file *file = cprm->file;
788
789
	loff_t pos = file->f_pos;
	ssize_t n;
790
	if (cprm->written + nr > cprm->limit)
Al Viro's avatar
Al Viro committed
791
		return 0;
792
793
794
	while (nr) {
		if (dump_interrupted())
			return 0;
795
		n = __kernel_write(file, addr, nr, &pos);
796
797
798
		if (n <= 0)
			return 0;
		file->f_pos = pos;
799
		cprm->written += n;
800
		cprm->pos += n;
801
802
		nr -= n;
	}
Al Viro's avatar
Al Viro committed
803
804
805
806
	return 1;
}
EXPORT_SYMBOL(dump_emit);

807
int dump_skip(struct coredump_params *cprm, size_t nr)
808
{
809
810
	static char zeroes[PAGE_SIZE];
	struct file *file = cprm->file;
811
	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
812
		if (dump_interrupted() ||
813
		    file->f_op->llseek(file, nr, SEEK_CUR) < 0)
814
			return 0;
815
		cprm->pos += nr;
816
		return 1;
817
	} else {
818
819
820
821
		while (nr > PAGE_SIZE) {
			if (!dump_emit(cprm, zeroes, PAGE_SIZE))
				return 0;
			nr -= PAGE_SIZE;
822
		}
823
		return dump_emit(cprm, zeroes, nr);
824
825
	}
}
826
EXPORT_SYMBOL(dump_skip);
Al Viro's avatar
Al Viro committed
827
828
829

int dump_align(struct coredump_params *cprm, int align)
{
830
	unsigned mod = cprm->pos & (align - 1);
Al Viro's avatar
Al Viro committed
831
	if (align & (align - 1))
Al Viro's avatar
Al Viro committed
832
833
		return 0;
	return mod ? dump_skip(cprm, align - mod) : 1;
Al Viro's avatar
Al Viro committed
834
835
}
EXPORT_SYMBOL(dump_align);
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853

/*
 * Ensures that file size is big enough to contain the current file
 * postion. This prevents gdb from complaining about a truncated file
 * if the last "write" to the file was dump_skip.
 */
void dump_truncate(struct coredump_params *cprm)
{
	struct file *file = cprm->file;
	loff_t offset;

	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
		offset = file->f_op->llseek(file, 0, SEEK_CUR);
		if (i_size_read(file->f_mapping->host) < offset)
			do_truncate(file->f_path.dentry, offset, 0, file);
	}
}
EXPORT_SYMBOL(dump_truncate);