fault.c 14.3 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/*
 *  arch/s390/mm/fault.c
 *
 *  S390 version
 *    Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
 *    Author(s): Hartmut Penner (hp@de.ibm.com)
 *               Ulrich Weigand (uweigand@de.ibm.com)
 *
 *  Derived from "arch/i386/mm/fault.c"
 *    Copyright (C) 1995  Linus Torvalds
 */

#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/smp.h>
23
#include <linux/kdebug.h>
Linus Torvalds's avatar
Linus Torvalds committed
24
25
26
27
28
#include <linux/smp_lock.h>
#include <linux/init.h>
#include <linux/console.h>
#include <linux/module.h>
#include <linux/hardirq.h>
Michael Grundy's avatar
Michael Grundy committed
29
#include <linux/kprobes.h>
30
#include <linux/uaccess.h>
Linus Torvalds's avatar
Linus Torvalds committed
31
32
33

#include <asm/system.h>
#include <asm/pgtable.h>
Heiko Carstens's avatar
Heiko Carstens committed
34
#include <asm/s390_ext.h>
Linus Torvalds's avatar
Linus Torvalds committed
35

36
#ifndef CONFIG_64BIT
Linus Torvalds's avatar
Linus Torvalds committed
37
38
39
40
#define __FAIL_ADDR_MASK 0x7ffff000
#define __FIXUP_MASK 0x7fffffff
#define __SUBCODE_MASK 0x0200
#define __PF_RES_FIELD 0ULL
41
#else /* CONFIG_64BIT */
Linus Torvalds's avatar
Linus Torvalds committed
42
43
44
45
#define __FAIL_ADDR_MASK -4096L
#define __FIXUP_MASK ~0L
#define __SUBCODE_MASK 0x0600
#define __PF_RES_FIELD 0x8000000000000000ULL
46
#endif /* CONFIG_64BIT */
Linus Torvalds's avatar
Linus Torvalds committed
47
48
49
50
51
52
53

#ifdef CONFIG_SYSCTL
extern int sysctl_userprocess_debug;
#endif

extern void die(const char *,struct pt_regs *,long);

Michael Grundy's avatar
Michael Grundy committed
54
#ifdef CONFIG_KPROBES
55
56
static inline int notify_page_fault(struct pt_regs *regs, long err)
{
57
58
59
60
61
62
63
64
65
66
67
	int ret = 0;

	/* kprobe_running() needs smp_processor_id() */
	if (!user_mode(regs)) {
		preempt_disable();
		if (kprobe_running() && kprobe_fault_handler(regs, 14))
			ret = 1;
		preempt_enable();
	}

	return ret;
Michael Grundy's avatar
Michael Grundy committed
68
69
}
#else
70
static inline int notify_page_fault(struct pt_regs *regs, long err)
Michael Grundy's avatar
Michael Grundy committed
71
{
72
	return 0;
Michael Grundy's avatar
Michael Grundy committed
73
74
75
}
#endif

Linus Torvalds's avatar
Linus Torvalds committed
76
77
78

/*
 * Unlock any spinlocks which will prevent us from getting the
79
 * message out.
Linus Torvalds's avatar
Linus Torvalds committed
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
 */
void bust_spinlocks(int yes)
{
	if (yes) {
		oops_in_progress = 1;
	} else {
		int loglevel_save = console_loglevel;
		console_unblank();
		oops_in_progress = 0;
		/*
		 * OK, the message is on the console.  Now we call printk()
		 * without oops_in_progress set so that printk will give klogd
		 * a poke.  Hold onto your hats...
		 */
		console_loglevel = 15;
		printk(" ");
		console_loglevel = loglevel_save;
	}
}

/*
101
102
103
 * Returns the address space associated with the fault.
 * Returns 0 for kernel space, 1 for user space and
 * 2 for code execution in user space with noexec=on.
Linus Torvalds's avatar
Linus Torvalds committed
104
 */
105
static inline int check_space(struct task_struct *tsk)
Linus Torvalds's avatar
Linus Torvalds committed
106
107
{
	/*
108
109
	 * The lowest two bits of S390_lowcore.trans_exc_code
	 * indicate which paging table was used.
Linus Torvalds's avatar
Linus Torvalds committed
110
	 */
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
	int desc = S390_lowcore.trans_exc_code & 3;

	if (desc == 3)	/* Home Segment Table Descriptor */
		return switch_amode == 0;
	if (desc == 2)	/* Secondary Segment Table Descriptor */
		return tsk->thread.mm_segment.ar4;
#ifdef CONFIG_S390_SWITCH_AMODE
	if (unlikely(desc == 1)) { /* STD determined via access register */
		/* %a0 always indicates primary space. */
		if (S390_lowcore.exc_access_id != 0) {
			save_access_regs(tsk->thread.acrs);
			/*
			 * An alet of 0 indicates primary space.
			 * An alet of 1 indicates secondary space.
			 * Any other alet values generate an
			 * alen-translation exception.
			 */
			if (tsk->thread.acrs[S390_lowcore.exc_access_id])
				return tsk->thread.mm_segment.ar4;
		}
	}
#endif
	/* Primary Segment Table Descriptor */
	return switch_amode << s390_noexec;
Linus Torvalds's avatar
Linus Torvalds committed
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
}

/*
 * Send SIGSEGV to task.  This is an external routine
 * to keep the stack usage of do_page_fault small.
 */
static void do_sigsegv(struct pt_regs *regs, unsigned long error_code,
		       int si_code, unsigned long address)
{
	struct siginfo si;

#if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG)
#if defined(CONFIG_SYSCTL)
	if (sysctl_userprocess_debug)
#endif
	{
		printk("User process fault: interruption code 0x%lX\n",
		       error_code);
		printk("failing address: %lX\n", address);
		show_regs(regs);
	}
#endif
	si.si_signo = SIGSEGV;
	si.si_code = si_code;
Heiko Carstens's avatar
Heiko Carstens committed
159
	si.si_addr = (void __user *) address;
Linus Torvalds's avatar
Linus Torvalds committed
160
161
162
	force_sig_info(SIGSEGV, &si, current);
}

163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
static void do_no_context(struct pt_regs *regs, unsigned long error_code,
			  unsigned long address)
{
	const struct exception_table_entry *fixup;

	/* Are we prepared to handle this kernel fault?  */
	fixup = search_exception_tables(regs->psw.addr & __FIXUP_MASK);
	if (fixup) {
		regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE;
		return;
	}

	/*
	 * Oops. The kernel tried to access some bad page. We'll have to
	 * terminate things with extreme prejudice.
	 */
	if (check_space(current) == 0)
		printk(KERN_ALERT "Unable to handle kernel pointer dereference"
		       " at virtual kernel address %p\n", (void *)address);
	else
		printk(KERN_ALERT "Unable to handle kernel paging request"
		       " at virtual user address %p\n", (void *)address);

	die("Oops", regs, error_code);
	do_exit(SIGKILL);
}

static void do_low_address(struct pt_regs *regs, unsigned long error_code)
{
	/* Low-address protection hit in kernel mode means
	   NULL pointer write access in kernel mode.  */
	if (regs->psw.mask & PSW_MASK_PSTATE) {
		/* Low-address protection hit in user mode 'cannot happen'. */
		die ("Low-address protection", regs, error_code);
		do_exit(SIGKILL);
	}

	do_no_context(regs, error_code, 0);
}

/*
 * We ran out of memory, or some other thing happened to us that made
 * us unable to handle the page fault gracefully.
 */
static int do_out_of_memory(struct pt_regs *regs, unsigned long error_code,
			    unsigned long address)
{
	struct task_struct *tsk = current;
	struct mm_struct *mm = tsk->mm;

	up_read(&mm->mmap_sem);
	if (is_init(tsk)) {
		yield();
		down_read(&mm->mmap_sem);
		return 1;
	}
	printk("VM: killing process %s\n", tsk->comm);
	if (regs->psw.mask & PSW_MASK_PSTATE)
		do_exit(SIGKILL);
	do_no_context(regs, error_code, address);
	return 0;
}

static void do_sigbus(struct pt_regs *regs, unsigned long error_code,
		      unsigned long address)
{
	struct task_struct *tsk = current;
	struct mm_struct *mm = tsk->mm;

	up_read(&mm->mmap_sem);
	/*
	 * Send a sigbus, regardless of whether we were in kernel
	 * or user mode.
	 */
	tsk->thread.prot_addr = address;
	tsk->thread.trap_no = error_code;
	force_sig(SIGBUS, tsk);

	/* Kernel mode? Handle exceptions or die */
	if (!(regs->psw.mask & PSW_MASK_PSTATE))
		do_no_context(regs, error_code, address);
}

Gerald Schaefer's avatar
Gerald Schaefer committed
246
247
248
249
250
251
#ifdef CONFIG_S390_EXEC_PROTECT
extern long sys_sigreturn(struct pt_regs *regs);
extern long sys_rt_sigreturn(struct pt_regs *regs);
extern long sys32_sigreturn(struct pt_regs *regs);
extern long sys32_rt_sigreturn(struct pt_regs *regs);

252
253
static int signal_return(struct mm_struct *mm, struct pt_regs *regs,
			 unsigned long address, unsigned long error_code)
Gerald Schaefer's avatar
Gerald Schaefer committed
254
{
255
	u16 instruction;
Heiko Carstens's avatar
Heiko Carstens committed
256
257
258
259
	int rc;
#ifdef CONFIG_COMPAT
	int compat;
#endif
260
261
262
263
264
265
266

	pagefault_disable();
	rc = __get_user(instruction, (u16 __user *) regs->psw.addr);
	pagefault_enable();
	if (rc)
		return -EFAULT;

Gerald Schaefer's avatar
Gerald Schaefer committed
267
268
269
	up_read(&mm->mmap_sem);
	clear_tsk_thread_flag(current, TIF_SINGLE_STEP);
#ifdef CONFIG_COMPAT
270
271
272
273
274
	compat = test_tsk_thread_flag(current, TIF_31BIT);
	if (compat && instruction == 0x0a77)
		sys32_sigreturn(regs);
	else if (compat && instruction == 0x0aad)
		sys32_rt_sigreturn(regs);
Gerald Schaefer's avatar
Gerald Schaefer committed
275
	else
276
277
#endif
	if (instruction == 0x0a77)
Gerald Schaefer's avatar
Gerald Schaefer committed
278
		sys_sigreturn(regs);
279
280
	else if (instruction == 0x0aad)
		sys_rt_sigreturn(regs);
Gerald Schaefer's avatar
Gerald Schaefer committed
281
282
283
284
285
286
287
288
289
	else {
		current->thread.prot_addr = address;
		current->thread.trap_no = error_code;
		do_sigsegv(regs, error_code, SEGV_MAPERR, address);
	}
	return 0;
}
#endif /* CONFIG_S390_EXEC_PROTECT */

Linus Torvalds's avatar
Linus Torvalds committed
290
291
292
293
294
295
296
297
298
299
300
/*
 * This routine handles page faults.  It determines the address,
 * and the problem, and then passes it off to one of the appropriate
 * routines.
 *
 * error_code:
 *   04       Protection           ->  Write-Protection  (suprression)
 *   10       Segment translation  ->  Not present       (nullification)
 *   11       Page translation     ->  Not present       (nullification)
 *   3b       Region third trans.  ->  Not present       (nullification)
 */
301
static inline void
302
do_exception(struct pt_regs *regs, unsigned long error_code, int write)
Linus Torvalds's avatar
Linus Torvalds committed
303
{
304
305
306
307
	struct task_struct *tsk;
	struct mm_struct *mm;
	struct vm_area_struct *vma;
	unsigned long address;
308
	int space;
309
	int si_code;
Nick Piggin's avatar
Nick Piggin committed
310
	int fault;
Linus Torvalds's avatar
Linus Torvalds committed
311

312
	if (notify_page_fault(regs, error_code))
Michael Grundy's avatar
Michael Grundy committed
313
314
		return;

315
316
	tsk = current;
	mm = tsk->mm;
Linus Torvalds's avatar
Linus Torvalds committed
317

318
319
	/* get the failing address and the affected space */
	address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK;
320
	space = check_space(tsk);
Linus Torvalds's avatar
Linus Torvalds committed
321
322
323
324
325
326

	/*
	 * Verify that the fault happened in user space, that
	 * we are not in an interrupt and that there is a 
	 * user context.
	 */
327
328
	if (unlikely(space == 0 || in_atomic() || !mm))
		goto no_context;
Linus Torvalds's avatar
Linus Torvalds committed
329
330
331
332
333
334
335
336

	/*
	 * When we get here, the fault happened in the current
	 * task's user address space, so we can switch on the
	 * interrupts again and then search the VMAs
	 */
	local_irq_enable();

337
	down_read(&mm->mmap_sem);
Linus Torvalds's avatar
Linus Torvalds committed
338

339
340
341
342
	si_code = SEGV_MAPERR;
	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;
Gerald Schaefer's avatar
Gerald Schaefer committed
343
344

#ifdef CONFIG_S390_EXEC_PROTECT
345
	if (unlikely((space == 2) && !(vma->vm_flags & VM_EXEC)))
Gerald Schaefer's avatar
Gerald Schaefer committed
346
347
348
349
350
351
352
353
		if (!signal_return(mm, regs, address, error_code))
			/*
			 * signal_return() has done an up_read(&mm->mmap_sem)
			 * if it returns 0.
			 */
			return;
#endif

354
355
356
357
358
359
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;
	if (expand_stack(vma, address))
		goto bad_area;
Linus Torvalds's avatar
Linus Torvalds committed
360
361
362
363
364
365
/*
 * Ok, we have a good vm_area for this memory access, so
 * we can handle it..
 */
good_area:
	si_code = SEGV_ACCERR;
366
	if (!write) {
Linus Torvalds's avatar
Linus Torvalds committed
367
368
369
370
371
372
373
374
375
376
377
378
379
380
		/* page not present, check vm flags */
		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
			goto bad_area;
	} else {
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
	}

survive:
	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
Nick Piggin's avatar
Nick Piggin committed
381
382
383
384
385
386
387
388
389
390
	fault = handle_mm_fault(mm, vma, address, write);
	if (unlikely(fault & VM_FAULT_ERROR)) {
		if (fault & VM_FAULT_OOM) {
			if (do_out_of_memory(regs, error_code, address))
				goto survive;
			return;
		} else if (fault & VM_FAULT_SIGBUS) {
			do_sigbus(regs, error_code, address);
			return;
		}
Linus Torvalds's avatar
Linus Torvalds committed
391
392
		BUG();
	}
Nick Piggin's avatar
Nick Piggin committed
393
394
395
396
	if (fault & VM_FAULT_MAJOR)
		tsk->maj_flt++;
	else
		tsk->min_flt++;
Linus Torvalds's avatar
Linus Torvalds committed
397
398
399
400
401
402

        up_read(&mm->mmap_sem);
	/*
	 * The instruction that caused the program check will
	 * be repeated. Don't signal single step via SIGTRAP.
	 */
403
	clear_tsk_thread_flag(tsk, TIF_SINGLE_STEP);
Linus Torvalds's avatar
Linus Torvalds committed
404
405
406
407
408
409
410
        return;

/*
 * Something tried to access memory that isn't in our memory map..
 * Fix it, but check if it's kernel or user first..
 */
bad_area:
411
	up_read(&mm->mmap_sem);
Linus Torvalds's avatar
Linus Torvalds committed
412

413
414
415
416
	/* User mode accesses just cause a SIGSEGV */
	if (regs->psw.mask & PSW_MASK_PSTATE) {
		tsk->thread.prot_addr = address;
		tsk->thread.trap_no = error_code;
Linus Torvalds's avatar
Linus Torvalds committed
417
		do_sigsegv(regs, error_code, si_code, address);
418
		return;
Linus Torvalds's avatar
Linus Torvalds committed
419
420
421
	}

no_context:
422
	do_no_context(regs, error_code, address);
Linus Torvalds's avatar
Linus Torvalds committed
423
424
}

425
426
void __kprobes do_protection_exception(struct pt_regs *regs,
				       unsigned long error_code)
Linus Torvalds's avatar
Linus Torvalds committed
427
{
428
	/* Protection exception is supressing, decrement psw address. */
Linus Torvalds's avatar
Linus Torvalds committed
429
	regs->psw.addr -= (error_code >> 16);
430
431
432
433
434
435
436
437
438
	/*
	 * Check for low-address protection.  This needs to be treated
	 * as a special case because the translation exception code
	 * field is not guaranteed to contain valid data in this case.
	 */
	if (unlikely(!(S390_lowcore.trans_exc_code & 4))) {
		do_low_address(regs, error_code);
		return;
	}
Linus Torvalds's avatar
Linus Torvalds committed
439
440
441
	do_exception(regs, 4, 1);
}

442
void __kprobes do_dat_exception(struct pt_regs *regs, unsigned long error_code)
Linus Torvalds's avatar
Linus Torvalds committed
443
444
445
446
447
448
449
450
{
	do_exception(regs, error_code & 0xff, 0);
}

#ifdef CONFIG_PFAULT 
/*
 * 'pfault' pseudo page faults routines.
 */
Heiko Carstens's avatar
Heiko Carstens committed
451
static ext_int_info_t ext_int_pfault;
Linus Torvalds's avatar
Linus Torvalds committed
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
static int pfault_disable = 0;

static int __init nopfault(char *str)
{
	pfault_disable = 1;
	return 1;
}

__setup("nopfault", nopfault);

typedef struct {
	__u16 refdiagc;
	__u16 reffcode;
	__u16 refdwlen;
	__u16 refversn;
	__u64 refgaddr;
	__u64 refselmk;
	__u64 refcmpmk;
	__u64 reserved;
471
} __attribute__ ((packed, aligned(8))) pfault_refbk_t;
Linus Torvalds's avatar
Linus Torvalds committed
472
473
474
475
476
477
478
479

int pfault_init(void)
{
	pfault_refbk_t refbk =
		{ 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48,
		  __PF_RES_FIELD };
        int rc;

Heiko Carstens's avatar
Heiko Carstens committed
480
	if (!MACHINE_IS_VM || pfault_disable)
Linus Torvalds's avatar
Linus Torvalds committed
481
		return -1;
482
483
484
485
	asm volatile(
		"	diag	%1,%0,0x258\n"
		"0:	j	2f\n"
		"1:	la	%0,8\n"
Linus Torvalds's avatar
Linus Torvalds committed
486
		"2:\n"
487
488
		EX_TABLE(0b,1b)
		: "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc");
Linus Torvalds's avatar
Linus Torvalds committed
489
490
491
492
493
494
495
496
497
        __ctl_set_bit(0, 9);
        return rc;
}

void pfault_fini(void)
{
	pfault_refbk_t refbk =
	{ 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL };

Heiko Carstens's avatar
Heiko Carstens committed
498
	if (!MACHINE_IS_VM || pfault_disable)
Linus Torvalds's avatar
Linus Torvalds committed
499
500
		return;
	__ctl_clear_bit(0,9);
501
502
	asm volatile(
		"	diag	%0,0,0x258\n"
Linus Torvalds's avatar
Linus Torvalds committed
503
		"0:\n"
504
505
		EX_TABLE(0b,0b)
		: : "a" (&refbk), "m" (refbk) : "cc");
Linus Torvalds's avatar
Linus Torvalds committed
506
507
}

508
static void pfault_interrupt(__u16 error_code)
Linus Torvalds's avatar
Linus Torvalds committed
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
{
	struct task_struct *tsk;
	__u16 subcode;

	/*
	 * Get the external interruption subcode & pfault
	 * initial/completion signal bit. VM stores this 
	 * in the 'cpu address' field associated with the
         * external interrupt. 
	 */
	subcode = S390_lowcore.cpu_addr;
	if ((subcode & 0xff00) != __SUBCODE_MASK)
		return;

	/*
	 * Get the token (= address of the task structure of the affected task).
	 */
	tsk = *(struct task_struct **) __LC_PFAULT_INTPARM;

	if (subcode & 0x0080) {
		/* signal bit is set -> a page has been swapped in by VM */
		if (xchg(&tsk->thread.pfault_wait, -1) != 0) {
			/* Initial interrupt was faster than the completion
			 * interrupt. pfault_wait is valid. Set pfault_wait
			 * back to zero and wake up the process. This can
			 * safely be done because the task is still sleeping
535
			 * and can't produce new pfaults. */
Linus Torvalds's avatar
Linus Torvalds committed
536
537
			tsk->thread.pfault_wait = 0;
			wake_up_process(tsk);
538
			put_task_struct(tsk);
Linus Torvalds's avatar
Linus Torvalds committed
539
540
541
		}
	} else {
		/* signal bit not set -> a real page is missing. */
542
		get_task_struct(tsk);
Linus Torvalds's avatar
Linus Torvalds committed
543
544
545
546
547
548
549
550
551
		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
		if (xchg(&tsk->thread.pfault_wait, 1) != 0) {
			/* Completion interrupt was faster than the initial
			 * interrupt (swapped in a -1 for pfault_wait). Set
			 * pfault_wait back to zero and exit. This can be
			 * done safely because tsk is running in kernel 
			 * mode and can't produce new pfaults. */
			tsk->thread.pfault_wait = 0;
			set_task_state(tsk, TASK_RUNNING);
552
			put_task_struct(tsk);
Linus Torvalds's avatar
Linus Torvalds committed
553
554
555
556
557
		} else
			set_tsk_need_resched(tsk);
	}
}

Heiko Carstens's avatar
Heiko Carstens committed
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
void __init pfault_irq_init(void)
{
	if (!MACHINE_IS_VM)
		return;

	/*
	 * Try to get pfault pseudo page faults going.
	 */
	if (register_early_external_interrupt(0x2603, pfault_interrupt,
					      &ext_int_pfault) != 0)
		panic("Couldn't request external interrupt 0x2603");

	if (pfault_init() == 0)
		return;

	/* Tough luck, no pfault. */
	pfault_disable = 1;
	unregister_early_external_interrupt(0x2603, pfault_interrupt,
					    &ext_int_pfault);
}
#endif