fault.c 15.9 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
/*
 *  arch/s390/mm/fault.c
 *
 *  S390 version
 *    Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
 *    Author(s): Hartmut Penner (hp@de.ibm.com)
 *               Ulrich Weigand (uweigand@de.ibm.com)
 *
 *  Derived from "arch/i386/mm/fault.c"
 *    Copyright (C) 1995  Linus Torvalds
 */

13
#include <linux/kernel_stat.h>
14
#include <linux/perf_event.h>
Linus Torvalds's avatar
Linus Torvalds committed
15
16
17
18
19
20
21
22
23
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
24
#include <linux/compat.h>
Linus Torvalds's avatar
Linus Torvalds committed
25
#include <linux/smp.h>
26
#include <linux/kdebug.h>
Linus Torvalds's avatar
Linus Torvalds committed
27
28
29
30
#include <linux/init.h>
#include <linux/console.h>
#include <linux/module.h>
#include <linux/hardirq.h>
Michael Grundy's avatar
Michael Grundy committed
31
#include <linux/kprobes.h>
32
#include <linux/uaccess.h>
33
#include <linux/hugetlb.h>
34
#include <asm/asm-offsets.h>
Linus Torvalds's avatar
Linus Torvalds committed
35
36
#include <asm/system.h>
#include <asm/pgtable.h>
Heiko Carstens's avatar
Heiko Carstens committed
37
#include <asm/s390_ext.h>
38
#include <asm/mmu_context.h>
39
#include <asm/compat.h>
40
#include "../kernel/entry.h"
Linus Torvalds's avatar
Linus Torvalds committed
41

42
#ifndef CONFIG_64BIT
Linus Torvalds's avatar
Linus Torvalds committed
43
44
45
#define __FAIL_ADDR_MASK 0x7ffff000
#define __SUBCODE_MASK 0x0200
#define __PF_RES_FIELD 0ULL
46
#else /* CONFIG_64BIT */
Linus Torvalds's avatar
Linus Torvalds committed
47
48
49
#define __FAIL_ADDR_MASK -4096L
#define __SUBCODE_MASK 0x0600
#define __PF_RES_FIELD 0x8000000000000000ULL
50
#endif /* CONFIG_64BIT */
Linus Torvalds's avatar
Linus Torvalds committed
51

52
53
54
55
#define VM_FAULT_BADCONTEXT	0x010000
#define VM_FAULT_BADMAP		0x020000
#define VM_FAULT_BADACCESS	0x040000

56
57
58
59
static unsigned long store_indication;

void fault_init(void)
{
60
	if (test_facility(2) && test_facility(75))
61
62
63
		store_indication = 0xc00;
}

64
static inline int notify_page_fault(struct pt_regs *regs)
65
{
66
67
68
	int ret = 0;

	/* kprobe_running() needs smp_processor_id() */
69
	if (kprobes_built_in() && !user_mode(regs)) {
70
71
72
73
74
75
		preempt_disable();
		if (kprobe_running() && kprobe_fault_handler(regs, 14))
			ret = 1;
		preempt_enable();
	}
	return ret;
Michael Grundy's avatar
Michael Grundy committed
76
77
}

Linus Torvalds's avatar
Linus Torvalds committed
78
79
80

/*
 * Unlock any spinlocks which will prevent us from getting the
81
 * message out.
Linus Torvalds's avatar
Linus Torvalds committed
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
 */
void bust_spinlocks(int yes)
{
	if (yes) {
		oops_in_progress = 1;
	} else {
		int loglevel_save = console_loglevel;
		console_unblank();
		oops_in_progress = 0;
		/*
		 * OK, the message is on the console.  Now we call printk()
		 * without oops_in_progress set so that printk will give klogd
		 * a poke.  Hold onto your hats...
		 */
		console_loglevel = 15;
		printk(" ");
		console_loglevel = loglevel_save;
	}
}

/*
103
 * Returns the address space associated with the fault.
104
 * Returns 0 for kernel space and 1 for user space.
Linus Torvalds's avatar
Linus Torvalds committed
105
 */
106
static inline int user_space_fault(unsigned long trans_exc_code)
Linus Torvalds's avatar
Linus Torvalds committed
107
108
{
	/*
109
110
	 * The lowest two bits of the translation exception
	 * identification indicate which paging table was used.
Linus Torvalds's avatar
Linus Torvalds committed
111
	 */
112
113
114
115
	trans_exc_code &= 3;
	if (trans_exc_code == 2)
		/* Access via secondary space, set_fs setting decides */
		return current->thread.mm_segment.ar4;
116
	if (user_mode == HOME_SPACE_MODE)
117
118
119
120
121
122
123
124
125
		/* User space if the access has been done via home space. */
		return trans_exc_code == 3;
	/*
	 * If the user space is not the home space the kernel runs in home
	 * space. Access via secondary space has already been covered,
	 * access via primary space or access register is from user space
	 * and access via home space is from the kernel.
	 */
	return trans_exc_code != 3;
Linus Torvalds's avatar
Linus Torvalds committed
126
127
}

128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
static inline void report_user_fault(struct pt_regs *regs, long int_code,
				     int signr, unsigned long address)
{
	if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
		return;
	if (!unhandled_signal(current, signr))
		return;
	if (!printk_ratelimit())
		return;
	printk("User process fault: interruption code 0x%lX ", int_code);
	print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
	printk("\n");
	printk("failing address: %lX\n", address);
	show_regs(regs);
}

Linus Torvalds's avatar
Linus Torvalds committed
144
145
146
147
/*
 * Send SIGSEGV to task.  This is an external routine
 * to keep the stack usage of do_page_fault small.
 */
148
149
static noinline void do_sigsegv(struct pt_regs *regs, long int_code,
				int si_code, unsigned long trans_exc_code)
Linus Torvalds's avatar
Linus Torvalds committed
150
151
{
	struct siginfo si;
152
	unsigned long address;
Linus Torvalds's avatar
Linus Torvalds committed
153

154
155
156
	address = trans_exc_code & __FAIL_ADDR_MASK;
	current->thread.prot_addr = address;
	current->thread.trap_no = int_code;
157
	report_user_fault(regs, int_code, SIGSEGV, address);
Linus Torvalds's avatar
Linus Torvalds committed
158
159
	si.si_signo = SIGSEGV;
	si.si_code = si_code;
Heiko Carstens's avatar
Heiko Carstens committed
160
	si.si_addr = (void __user *) address;
Linus Torvalds's avatar
Linus Torvalds committed
161
162
163
	force_sig_info(SIGSEGV, &si, current);
}

164
165
static noinline void do_no_context(struct pt_regs *regs, long int_code,
				   unsigned long trans_exc_code)
166
167
{
	const struct exception_table_entry *fixup;
168
	unsigned long address;
169
170

	/* Are we prepared to handle this kernel fault?  */
171
	fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
172
173
174
175
176
177
178
179
180
	if (fixup) {
		regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE;
		return;
	}

	/*
	 * Oops. The kernel tried to access some bad page. We'll have to
	 * terminate things with extreme prejudice.
	 */
181
	address = trans_exc_code & __FAIL_ADDR_MASK;
182
	if (!user_space_fault(trans_exc_code))
183
184
185
186
187
188
		printk(KERN_ALERT "Unable to handle kernel pointer dereference"
		       " at virtual kernel address %p\n", (void *)address);
	else
		printk(KERN_ALERT "Unable to handle kernel paging request"
		       " at virtual user address %p\n", (void *)address);

189
	die("Oops", regs, int_code);
190
191
192
	do_exit(SIGKILL);
}

193
194
static noinline void do_low_address(struct pt_regs *regs, long int_code,
				    unsigned long trans_exc_code)
195
196
197
198
199
{
	/* Low-address protection hit in kernel mode means
	   NULL pointer write access in kernel mode.  */
	if (regs->psw.mask & PSW_MASK_PSTATE) {
		/* Low-address protection hit in user mode 'cannot happen'. */
200
		die ("Low-address protection", regs, int_code);
201
202
203
		do_exit(SIGKILL);
	}

204
	do_no_context(regs, int_code, trans_exc_code);
205
206
}

207
208
static noinline void do_sigbus(struct pt_regs *regs, long int_code,
			       unsigned long trans_exc_code)
209
210
{
	struct task_struct *tsk = current;
211
212
	unsigned long address;
	struct siginfo si;
213
214
215
216
217

	/*
	 * Send a sigbus, regardless of whether we were in kernel
	 * or user mode.
	 */
218
219
	address = trans_exc_code & __FAIL_ADDR_MASK;
	tsk->thread.prot_addr = address;
220
	tsk->thread.trap_no = int_code;
221
222
223
224
225
	si.si_signo = SIGBUS;
	si.si_errno = 0;
	si.si_code = BUS_ADRERR;
	si.si_addr = (void __user *) address;
	force_sig_info(SIGBUS, &si, tsk);
226
227
}

Gerald Schaefer's avatar
Gerald Schaefer committed
228
#ifdef CONFIG_S390_EXEC_PROTECT
229
230
static noinline int signal_return(struct pt_regs *regs, long int_code,
				  unsigned long trans_exc_code)
Gerald Schaefer's avatar
Gerald Schaefer committed
231
{
232
	u16 instruction;
Heiko Carstens's avatar
Heiko Carstens committed
233
	int rc;
234
235
236

	rc = __get_user(instruction, (u16 __user *) regs->psw.addr);

237
	if (!rc && instruction == 0x0a77) {
Martin Schwidefsky's avatar
Martin Schwidefsky committed
238
		clear_tsk_thread_flag(current, TIF_PER_TRAP);
239
240
241
242
243
		if (is_compat_task())
			sys32_sigreturn();
		else
			sys_sigreturn();
	} else if (!rc && instruction == 0x0aad) {
Martin Schwidefsky's avatar
Martin Schwidefsky committed
244
		clear_tsk_thread_flag(current, TIF_PER_TRAP);
245
246
247
248
249
250
		if (is_compat_task())
			sys32_rt_sigreturn();
		else
			sys_rt_sigreturn();
	} else
		do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code);
Gerald Schaefer's avatar
Gerald Schaefer committed
251
252
253
254
	return 0;
}
#endif /* CONFIG_S390_EXEC_PROTECT */

255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
static noinline void do_fault_error(struct pt_regs *regs, long int_code,
				    unsigned long trans_exc_code, int fault)
{
	int si_code;

	switch (fault) {
	case VM_FAULT_BADACCESS:
#ifdef CONFIG_S390_EXEC_PROTECT
		if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY &&
		    (trans_exc_code & 3) == 0) {
			signal_return(regs, int_code, trans_exc_code);
			break;
		}
#endif /* CONFIG_S390_EXEC_PROTECT */
	case VM_FAULT_BADMAP:
		/* Bad memory access. Check if it is kernel or user space. */
		if (regs->psw.mask & PSW_MASK_PSTATE) {
			/* User mode accesses just cause a SIGSEGV */
			si_code = (fault == VM_FAULT_BADMAP) ?
				SEGV_MAPERR : SEGV_ACCERR;
			do_sigsegv(regs, int_code, si_code, trans_exc_code);
			return;
		}
	case VM_FAULT_BADCONTEXT:
		do_no_context(regs, int_code, trans_exc_code);
		break;
	default: /* fault & VM_FAULT_ERROR */
		if (fault & VM_FAULT_OOM)
			pagefault_out_of_memory();
		else if (fault & VM_FAULT_SIGBUS) {
			/* Kernel mode? Handle exceptions or die */
			if (!(regs->psw.mask & PSW_MASK_PSTATE))
				do_no_context(regs, int_code, trans_exc_code);
288
289
			else
				do_sigbus(regs, int_code, trans_exc_code);
290
291
292
293
294
295
		} else
			BUG();
		break;
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
296
297
298
299
300
/*
 * This routine handles page faults.  It determines the address,
 * and the problem, and then passes it off to one of the appropriate
 * routines.
 *
301
 * interruption code (int_code):
Linus Torvalds's avatar
Linus Torvalds committed
302
303
304
305
306
 *   04       Protection           ->  Write-Protection  (suprression)
 *   10       Segment translation  ->  Not present       (nullification)
 *   11       Page translation     ->  Not present       (nullification)
 *   3b       Region third trans.  ->  Not present       (nullification)
 */
307
static inline int do_exception(struct pt_regs *regs, int access,
308
			       unsigned long trans_exc_code)
Linus Torvalds's avatar
Linus Torvalds committed
309
{
310
311
312
313
	struct task_struct *tsk;
	struct mm_struct *mm;
	struct vm_area_struct *vma;
	unsigned long address;
314
	int fault, write;
Linus Torvalds's avatar
Linus Torvalds committed
315

316
	if (notify_page_fault(regs))
317
		return 0;
Michael Grundy's avatar
Michael Grundy committed
318

319
320
	tsk = current;
	mm = tsk->mm;
Linus Torvalds's avatar
Linus Torvalds committed
321
322
323
324
325
326

	/*
	 * Verify that the fault happened in user space, that
	 * we are not in an interrupt and that there is a 
	 * user context.
	 */
327
	fault = VM_FAULT_BADCONTEXT;
328
	if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
329
		goto out;
Linus Torvalds's avatar
Linus Torvalds committed
330

331
	address = trans_exc_code & __FAIL_ADDR_MASK;
332
	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
333
	down_read(&mm->mmap_sem);
Linus Torvalds's avatar
Linus Torvalds committed
334

335
	fault = VM_FAULT_BADMAP;
336
337
	vma = find_vma(mm, address);
	if (!vma)
338
		goto out_up;
Gerald Schaefer's avatar
Gerald Schaefer committed
339

340
341
342
343
344
345
346
347
348
349
350
351
	if (unlikely(vma->vm_start > address)) {
		if (!(vma->vm_flags & VM_GROWSDOWN))
			goto out_up;
		if (expand_stack(vma, address))
			goto out_up;
	}

	/*
	 * Ok, we have a good vm_area for this memory access, so
	 * we can handle it..
	 */
	fault = VM_FAULT_BADACCESS;
352
	if (unlikely(!(vma->vm_flags & access)))
353
		goto out_up;
Linus Torvalds's avatar
Linus Torvalds committed
354

355
356
	if (is_vm_hugetlb_page(vma))
		address &= HPAGE_MASK;
Linus Torvalds's avatar
Linus Torvalds committed
357
358
359
360
361
	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
362
363
364
365
	write = (access == VM_WRITE ||
		 (trans_exc_code & store_indication) == 0x400) ?
		FAULT_FLAG_WRITE : 0;
	fault = handle_mm_fault(mm, vma, address, write);
366
367
368
	if (unlikely(fault & VM_FAULT_ERROR))
		goto out_up;

369
	if (fault & VM_FAULT_MAJOR) {
Nick Piggin's avatar
Nick Piggin committed
370
		tsk->maj_flt++;
371
		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
372
373
				     regs, address);
	} else {
Nick Piggin's avatar
Nick Piggin committed
374
		tsk->min_flt++;
375
		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
376
377
				     regs, address);
	}
Linus Torvalds's avatar
Linus Torvalds committed
378
379
380
381
	/*
	 * The instruction that caused the program check will
	 * be repeated. Don't signal single step via SIGTRAP.
	 */
Martin Schwidefsky's avatar
Martin Schwidefsky committed
382
	clear_tsk_thread_flag(tsk, TIF_PER_TRAP);
383
384
	fault = 0;
out_up:
385
	up_read(&mm->mmap_sem);
386
387
out:
	return fault;
Linus Torvalds's avatar
Linus Torvalds committed
388
389
}

390
391
void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code,
				       unsigned long trans_exc_code)
Linus Torvalds's avatar
Linus Torvalds committed
392
{
393
	int fault;
394

395
	/* Protection exception is supressing, decrement psw address. */
396
	regs->psw.addr -= (pgm_int_code >> 16);
397
398
399
400
401
	/*
	 * Check for low-address protection.  This needs to be treated
	 * as a special case because the translation exception code
	 * field is not guaranteed to contain valid data in this case.
	 */
402
	if (unlikely(!(trans_exc_code & 4))) {
403
		do_low_address(regs, pgm_int_code, trans_exc_code);
404
405
		return;
	}
406
	fault = do_exception(regs, VM_WRITE, trans_exc_code);
407
408
	if (unlikely(fault))
		do_fault_error(regs, 4, trans_exc_code, fault);
Linus Torvalds's avatar
Linus Torvalds committed
409
410
}

411
412
void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code,
				unsigned long trans_exc_code)
Linus Torvalds's avatar
Linus Torvalds committed
413
{
414
	int access, fault;
415

416
417
418
419
420
421
422
	access = VM_READ | VM_EXEC | VM_WRITE;
#ifdef CONFIG_S390_EXEC_PROTECT
	if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY &&
	    (trans_exc_code & 3) == 0)
		access = VM_EXEC;
#endif
	fault = do_exception(regs, access, trans_exc_code);
423
	if (unlikely(fault))
424
		do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault);
Linus Torvalds's avatar
Linus Torvalds committed
425
426
}

427
#ifdef CONFIG_64BIT
428
429
void __kprobes do_asce_exception(struct pt_regs *regs, long pgm_int_code,
				 unsigned long trans_exc_code)
430
{
431
	struct mm_struct *mm = current->mm;
432
433
	struct vm_area_struct *vma;

434
	if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
435
436
437
		goto no_context;

	down_read(&mm->mmap_sem);
438
	vma = find_vma(mm, trans_exc_code & __FAIL_ADDR_MASK);
439
440
441
442
443
444
445
446
447
	up_read(&mm->mmap_sem);

	if (vma) {
		update_mm(mm, current);
		return;
	}

	/* User mode accesses just cause a SIGSEGV */
	if (regs->psw.mask & PSW_MASK_PSTATE) {
448
		do_sigsegv(regs, pgm_int_code, SEGV_MAPERR, trans_exc_code);
449
450
451
452
		return;
	}

no_context:
453
	do_no_context(regs, pgm_int_code, trans_exc_code);
454
455
456
}
#endif

457
int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write)
458
459
460
461
462
463
464
465
466
467
{
	struct pt_regs regs;
	int access, fault;

	regs.psw.mask = psw_kernel_bits;
	if (!irqs_disabled())
		regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT;
	regs.psw.addr = (unsigned long) __builtin_return_address(0);
	regs.psw.addr |= PSW_ADDR_AMODE;
	uaddr &= PAGE_MASK;
468
	access = write ? VM_WRITE : VM_READ;
469
470
471
472
473
474
	fault = do_exception(&regs, access, uaddr | 2);
	if (unlikely(fault)) {
		if (fault & VM_FAULT_OOM) {
			pagefault_out_of_memory();
			fault = 0;
		} else if (fault & VM_FAULT_SIGBUS)
475
			do_sigbus(&regs, pgm_int_code, uaddr);
476
477
478
479
	}
	return fault ? -EFAULT : 0;
}

Linus Torvalds's avatar
Linus Torvalds committed
480
481
482
483
#ifdef CONFIG_PFAULT 
/*
 * 'pfault' pseudo page faults routines.
 */
484
static int pfault_disable;
Linus Torvalds's avatar
Linus Torvalds committed
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502

static int __init nopfault(char *str)
{
	pfault_disable = 1;
	return 1;
}

__setup("nopfault", nopfault);

typedef struct {
	__u16 refdiagc;
	__u16 reffcode;
	__u16 refdwlen;
	__u16 refversn;
	__u64 refgaddr;
	__u64 refselmk;
	__u64 refcmpmk;
	__u64 reserved;
503
} __attribute__ ((packed, aligned(8))) pfault_refbk_t;
Linus Torvalds's avatar
Linus Torvalds committed
504
505
506
507
508
509
510
511

int pfault_init(void)
{
	pfault_refbk_t refbk =
		{ 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48,
		  __PF_RES_FIELD };
        int rc;

Heiko Carstens's avatar
Heiko Carstens committed
512
	if (!MACHINE_IS_VM || pfault_disable)
Linus Torvalds's avatar
Linus Torvalds committed
513
		return -1;
514
515
516
517
	asm volatile(
		"	diag	%1,%0,0x258\n"
		"0:	j	2f\n"
		"1:	la	%0,8\n"
Linus Torvalds's avatar
Linus Torvalds committed
518
		"2:\n"
519
520
		EX_TABLE(0b,1b)
		: "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc");
Linus Torvalds's avatar
Linus Torvalds committed
521
522
523
524
525
526
527
528
529
        __ctl_set_bit(0, 9);
        return rc;
}

void pfault_fini(void)
{
	pfault_refbk_t refbk =
	{ 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL };

Heiko Carstens's avatar
Heiko Carstens committed
530
	if (!MACHINE_IS_VM || pfault_disable)
Linus Torvalds's avatar
Linus Torvalds committed
531
532
		return;
	__ctl_clear_bit(0,9);
533
534
	asm volatile(
		"	diag	%0,0,0x258\n"
Linus Torvalds's avatar
Linus Torvalds committed
535
		"0:\n"
536
537
		EX_TABLE(0b,0b)
		: : "a" (&refbk), "m" (refbk) : "cc");
Linus Torvalds's avatar
Linus Torvalds committed
538
539
}

540
541
static void pfault_interrupt(unsigned int ext_int_code,
			     unsigned int param32, unsigned long param64)
Linus Torvalds's avatar
Linus Torvalds committed
542
543
544
545
{
	struct task_struct *tsk;
	__u16 subcode;

546
	kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
Linus Torvalds's avatar
Linus Torvalds committed
547
548
549
550
551
552
	/*
	 * Get the external interruption subcode & pfault
	 * initial/completion signal bit. VM stores this 
	 * in the 'cpu address' field associated with the
         * external interrupt. 
	 */
553
	subcode = ext_int_code >> 16;
Linus Torvalds's avatar
Linus Torvalds committed
554
555
556
557
558
559
	if ((subcode & 0xff00) != __SUBCODE_MASK)
		return;

	/*
	 * Get the token (= address of the task structure of the affected task).
	 */
560
561
562
563
564
#ifdef CONFIG_64BIT
	tsk = *(struct task_struct **) param64;
#else
	tsk = *(struct task_struct **) param32;
#endif
Linus Torvalds's avatar
Linus Torvalds committed
565
566
567
568
569
570
571
572

	if (subcode & 0x0080) {
		/* signal bit is set -> a page has been swapped in by VM */
		if (xchg(&tsk->thread.pfault_wait, -1) != 0) {
			/* Initial interrupt was faster than the completion
			 * interrupt. pfault_wait is valid. Set pfault_wait
			 * back to zero and wake up the process. This can
			 * safely be done because the task is still sleeping
573
			 * and can't produce new pfaults. */
Linus Torvalds's avatar
Linus Torvalds committed
574
575
			tsk->thread.pfault_wait = 0;
			wake_up_process(tsk);
576
			put_task_struct(tsk);
Linus Torvalds's avatar
Linus Torvalds committed
577
578
579
		}
	} else {
		/* signal bit not set -> a real page is missing. */
580
		get_task_struct(tsk);
Linus Torvalds's avatar
Linus Torvalds committed
581
582
583
584
585
586
587
588
589
		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
		if (xchg(&tsk->thread.pfault_wait, 1) != 0) {
			/* Completion interrupt was faster than the initial
			 * interrupt (swapped in a -1 for pfault_wait). Set
			 * pfault_wait back to zero and exit. This can be
			 * done safely because tsk is running in kernel 
			 * mode and can't produce new pfaults. */
			tsk->thread.pfault_wait = 0;
			set_task_state(tsk, TASK_RUNNING);
590
			put_task_struct(tsk);
Linus Torvalds's avatar
Linus Torvalds committed
591
592
593
594
595
		} else
			set_tsk_need_resched(tsk);
	}
}

596
static int __init pfault_irq_init(void)
Heiko Carstens's avatar
Heiko Carstens committed
597
{
598
	int rc;
Heiko Carstens's avatar
Heiko Carstens committed
599

600
601
	if (!MACHINE_IS_VM)
		return 0;
Heiko Carstens's avatar
Heiko Carstens committed
602
603
604
	/*
	 * Try to get pfault pseudo page faults going.
	 */
605
606
607
608
609
	rc = register_external_interrupt(0x2603, pfault_interrupt);
	if (rc) {
		pfault_disable = 1;
		return rc;
	}
Heiko Carstens's avatar
Heiko Carstens committed
610
	if (pfault_init() == 0)
611
		return 0;
Heiko Carstens's avatar
Heiko Carstens committed
612
613
614

	/* Tough luck, no pfault. */
	pfault_disable = 1;
615
616
	unregister_external_interrupt(0x2603, pfault_interrupt);
	return 0;
Heiko Carstens's avatar
Heiko Carstens committed
617
}
618
619
early_initcall(pfault_irq_init);

Heiko Carstens's avatar
Heiko Carstens committed
620
#endif