entry_64.S 34.5 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/*
 *  linux/arch/x86_64/entry.S
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *  Copyright (C) 2000, 2001, 2002  Andi Kleen SuSE Labs
 *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
 */

/*
 * entry.S contains the system-call and fault low-level handling routines.
 *
 * NOTE: This code handles signal-recognition, which happens every time
 * after an interrupt and after each system call.
 * 
 * Normal syscalls and interrupts don't save a full stack frame, this is 
 * only done for syscall tracing, signals or fork/exec et.al.
 * 
 * A note on terminology:	 
 * - top of stack: Architecture defined interrupt frame from SS to RIP 
 * at the top of the kernel process stack.	
 * - partial stack frame: partially saved registers upto R11.
 * - full stack frame: Like partial stack frame, but all register saved. 
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
 *
 * Some macro usage:
 * - CFI macros are used to generate dwarf2 unwind information for better
 * backtraces. They don't change any code.
 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
 * There are unfortunately lots of special cases where some registers
 * not touched. The macro is a big mess that should be cleaned up.
 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
 * Gives a full stack frame.
 * - ENTRY/END Define functions in the symbol table.
 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
 * frame that is otherwise undefined after a SYSCALL
 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
Linus Torvalds's avatar
Linus Torvalds committed
38
39
40
41
42
43
44
45
 */

#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/cache.h>
#include <asm/errno.h>
#include <asm/dwarf2.h>
#include <asm/calling.h>
46
#include <asm/asm-offsets.h>
Linus Torvalds's avatar
Linus Torvalds committed
47
48
49
50
#include <asm/msr.h>
#include <asm/unistd.h>
#include <asm/thread_info.h>
#include <asm/hw_irq.h>
51
#include <asm/page.h>
52
#include <asm/irqflags.h>
53
#include <asm/paravirt.h>
54
#include <asm/ftrace.h>
Linus Torvalds's avatar
Linus Torvalds committed
55

56
57
58
59
60
61
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
#include <linux/elf-em.h>
#define AUDIT_ARCH_X86_64	(EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
#define __AUDIT_ARCH_64BIT 0x80000000
#define __AUDIT_ARCH_LE	   0x40000000

Linus Torvalds's avatar
Linus Torvalds committed
62
63
	.code64

64
#ifdef CONFIG_FTRACE
65
66
67
68
69
70
71
72
73
74
75
76
77
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(mcount)

	subq $0x38, %rsp
	movq %rax, (%rsp)
	movq %rcx, 8(%rsp)
	movq %rdx, 16(%rsp)
	movq %rsi, 24(%rsp)
	movq %rdi, 32(%rsp)
	movq %r8, 40(%rsp)
	movq %r9, 48(%rsp)

	movq 0x38(%rsp), %rdi
78
	subq $MCOUNT_INSN_SIZE, %rdi
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109

.globl mcount_call
mcount_call:
	call ftrace_stub

	movq 48(%rsp), %r9
	movq 40(%rsp), %r8
	movq 32(%rsp), %rdi
	movq 24(%rsp), %rsi
	movq 16(%rsp), %rdx
	movq 8(%rsp), %rcx
	movq (%rsp), %rax
	addq $0x38, %rsp

	retq
END(mcount)

ENTRY(ftrace_caller)

	/* taken from glibc */
	subq $0x38, %rsp
	movq %rax, (%rsp)
	movq %rcx, 8(%rsp)
	movq %rdx, 16(%rsp)
	movq %rsi, 24(%rsp)
	movq %rdi, 32(%rsp)
	movq %r8, 40(%rsp)
	movq %r9, 48(%rsp)

	movq 0x38(%rsp), %rdi
	movq 8(%rbp), %rsi
110
	subq $MCOUNT_INSN_SIZE, %rdi
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

.globl ftrace_call
ftrace_call:
	call ftrace_stub

	movq 48(%rsp), %r9
	movq 40(%rsp), %r8
	movq 32(%rsp), %rdi
	movq 24(%rsp), %rsi
	movq 16(%rsp), %rdx
	movq 8(%rsp), %rcx
	movq (%rsp), %rax
	addq $0x38, %rsp

.globl ftrace_stub
ftrace_stub:
	retq
END(ftrace_caller)

#else /* ! CONFIG_DYNAMIC_FTRACE */
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
ENTRY(mcount)
	cmpq $ftrace_stub, ftrace_trace_function
	jnz trace
.globl ftrace_stub
ftrace_stub:
	retq

trace:
	/* taken from glibc */
	subq $0x38, %rsp
	movq %rax, (%rsp)
	movq %rcx, 8(%rsp)
	movq %rdx, 16(%rsp)
	movq %rsi, 24(%rsp)
	movq %rdi, 32(%rsp)
	movq %r8, 40(%rsp)
	movq %r9, 48(%rsp)

	movq 0x38(%rsp), %rdi
	movq 8(%rbp), %rsi
151
	subq $MCOUNT_INSN_SIZE, %rdi
152
153
154
155
156
157
158
159
160
161
162
163
164
165

	call   *ftrace_trace_function

	movq 48(%rsp), %r9
	movq 40(%rsp), %r8
	movq 32(%rsp), %rdi
	movq 24(%rsp), %rsi
	movq 16(%rsp), %rdx
	movq 8(%rsp), %rcx
	movq (%rsp), %rax
	addq $0x38, %rsp

	jmp ftrace_stub
END(mcount)
166
167
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FTRACE */
168

169
#ifndef CONFIG_PREEMPT
Linus Torvalds's avatar
Linus Torvalds committed
170
171
#define retint_kernel retint_restore_args
#endif	
172

173
#ifdef CONFIG_PARAVIRT
174
ENTRY(native_usergs_sysret64)
175
176
177
178
	swapgs
	sysretq
#endif /* CONFIG_PARAVIRT */

179
180
181
182
183
184
185
186
187
188

.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
#ifdef CONFIG_TRACE_IRQFLAGS
	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
	jnc  1f
	TRACE_IRQS_ON
1:
#endif
.endm

Linus Torvalds's avatar
Linus Torvalds committed
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
/*
 * C code is not supposed to know about undefined top of stack. Every time 
 * a C function with an pt_regs argument is called from the SYSCALL based 
 * fast path FIXUP_TOP_OF_STACK is needed.
 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
 * manipulation.
 */        	
		
	/* %rsp:at FRAMEEND */ 
	.macro FIXUP_TOP_OF_STACK tmp
	movq	%gs:pda_oldrsp,\tmp
	movq  	\tmp,RSP(%rsp)
	movq    $__USER_DS,SS(%rsp)
	movq    $__USER_CS,CS(%rsp)
	movq 	$-1,RCX(%rsp)
	movq	R11(%rsp),\tmp  /* get eflags */
	movq	\tmp,EFLAGS(%rsp)
	.endm

	.macro RESTORE_TOP_OF_STACK tmp,offset=0
	movq   RSP-\offset(%rsp),\tmp
	movq   \tmp,%gs:pda_oldrsp
	movq   EFLAGS-\offset(%rsp),\tmp
	movq   \tmp,R11-\offset(%rsp)
	.endm

	.macro FAKE_STACK_FRAME child_rip
	/* push in order ss, rsp, eflags, cs, rip */
217
	xorl %eax, %eax
218
	pushq $__KERNEL_DS /* ss */
Linus Torvalds's avatar
Linus Torvalds committed
219
	CFI_ADJUST_CFA_OFFSET	8
220
	/*CFI_REL_OFFSET	ss,0*/
Linus Torvalds's avatar
Linus Torvalds committed
221
222
	pushq %rax /* rsp */
	CFI_ADJUST_CFA_OFFSET	8
223
	CFI_REL_OFFSET	rsp,0
Linus Torvalds's avatar
Linus Torvalds committed
224
225
	pushq $(1<<9) /* eflags - interrupts on */
	CFI_ADJUST_CFA_OFFSET	8
226
	/*CFI_REL_OFFSET	rflags,0*/
Linus Torvalds's avatar
Linus Torvalds committed
227
228
	pushq $__KERNEL_CS /* cs */
	CFI_ADJUST_CFA_OFFSET	8
229
	/*CFI_REL_OFFSET	cs,0*/
Linus Torvalds's avatar
Linus Torvalds committed
230
231
	pushq \child_rip /* rip */
	CFI_ADJUST_CFA_OFFSET	8
232
	CFI_REL_OFFSET	rip,0
Linus Torvalds's avatar
Linus Torvalds committed
233
234
235
236
237
238
239
240
241
	pushq	%rax /* orig rax */
	CFI_ADJUST_CFA_OFFSET	8
	.endm

	.macro UNFAKE_STACK_FRAME
	addq $8*6, %rsp
	CFI_ADJUST_CFA_OFFSET	-(6*8)
	.endm

242
243
244
	.macro	CFI_DEFAULT_STACK start=1
	.if \start
	CFI_STARTPROC	simple
245
	CFI_SIGNAL_FRAME
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
	CFI_DEF_CFA	rsp,SS+8
	.else
	CFI_DEF_CFA_OFFSET SS+8
	.endif
	CFI_REL_OFFSET	r15,R15
	CFI_REL_OFFSET	r14,R14
	CFI_REL_OFFSET	r13,R13
	CFI_REL_OFFSET	r12,R12
	CFI_REL_OFFSET	rbp,RBP
	CFI_REL_OFFSET	rbx,RBX
	CFI_REL_OFFSET	r11,R11
	CFI_REL_OFFSET	r10,R10
	CFI_REL_OFFSET	r9,R9
	CFI_REL_OFFSET	r8,R8
	CFI_REL_OFFSET	rax,RAX
	CFI_REL_OFFSET	rcx,RCX
	CFI_REL_OFFSET	rdx,RDX
	CFI_REL_OFFSET	rsi,RSI
	CFI_REL_OFFSET	rdi,RDI
	CFI_REL_OFFSET	rip,RIP
	/*CFI_REL_OFFSET	cs,CS*/
	/*CFI_REL_OFFSET	rflags,EFLAGS*/
	CFI_REL_OFFSET	rsp,RSP
	/*CFI_REL_OFFSET	ss,SS*/
Linus Torvalds's avatar
Linus Torvalds committed
270
271
272
273
274
275
276
	.endm
/*
 * A newly forked process directly context switches into this.
 */ 	
/* rdi:	prev */	
ENTRY(ret_from_fork)
	CFI_DEFAULT_STACK
277
278
279
280
	push kernel_eflags(%rip)
	CFI_ADJUST_CFA_OFFSET 4
	popf				# reset kernel eflags
	CFI_ADJUST_CFA_OFFSET -4
Linus Torvalds's avatar
Linus Torvalds committed
281
282
	call schedule_tail
	GET_THREAD_INFO(%rcx)
Glauber Costa's avatar
Glauber Costa committed
283
	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
Linus Torvalds's avatar
Linus Torvalds committed
284
285
286
287
288
	jnz rff_trace
rff_action:	
	RESTORE_REST
	testl $3,CS-ARGOFFSET(%rsp)	# from kernel_thread?
	je   int_ret_from_sys_call
Glauber Costa's avatar
Glauber Costa committed
289
	testl $_TIF_IA32,TI_flags(%rcx)
Linus Torvalds's avatar
Linus Torvalds committed
290
291
292
293
294
295
296
297
298
	jnz  int_ret_from_sys_call
	RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
	jmp ret_from_sys_call
rff_trace:
	movq %rsp,%rdi
	call syscall_trace_leave
	GET_THREAD_INFO(%rcx)	
	jmp rff_action
	CFI_ENDPROC
299
END(ret_from_fork)
Linus Torvalds's avatar
Linus Torvalds committed
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325

/*
 * System call entry. Upto 6 arguments in registers are supported.
 *
 * SYSCALL does not save anything on the stack and does not change the
 * stack pointer.
 */
		
/*
 * Register setup:	
 * rax  system call number
 * rdi  arg0
 * rcx  return address for syscall/sysret, C arg3 
 * rsi  arg1
 * rdx  arg2	
 * r10  arg3 	(--> moved to rcx for C)
 * r8   arg4
 * r9   arg5
 * r11  eflags for syscall/sysret, temporary for C
 * r12-r15,rbp,rbx saved by C code, not touched. 		
 * 
 * Interrupts are off on entry.
 * Only called from user space.
 *
 * XXX	if we had a free scratch register we could save the RSP into the stack frame
 *      and report it properly in ps. Unfortunately we haven't.
326
327
328
329
 *
 * When user can change the frames always force IRET. That is because
 * it deals with uncanonical addresses better. SYSRET has trouble
 * with them due to bugs in both AMD and Intel CPUs.
Linus Torvalds's avatar
Linus Torvalds committed
330
331
332
 */ 			 		

ENTRY(system_call)
333
	CFI_STARTPROC	simple
334
	CFI_SIGNAL_FRAME
335
	CFI_DEF_CFA	rsp,PDA_STACKOFFSET
336
337
	CFI_REGISTER	rip,rcx
	/*CFI_REGISTER	rflags,r11*/
338
339
340
341
342
343
344
345
	SWAPGS_UNSAFE_STACK
	/*
	 * A hypervisor implementation might want to use a label
	 * after the swapgs, so that it can do the swapgs
	 * for the guest and jump here on syscall.
	 */
ENTRY(system_call_after_swapgs)

Linus Torvalds's avatar
Linus Torvalds committed
346
347
	movq	%rsp,%gs:pda_oldrsp 
	movq	%gs:pda_kernelstack,%rsp
348
349
350
351
	/*
	 * No need to follow this irqs off/on section - it's straight
	 * and short:
	 */
352
	ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds's avatar
Linus Torvalds committed
353
354
	SAVE_ARGS 8,1
	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp) 
355
356
	movq  %rcx,RIP-ARGOFFSET(%rsp)
	CFI_REL_OFFSET rip,RIP-ARGOFFSET
Linus Torvalds's avatar
Linus Torvalds committed
357
	GET_THREAD_INFO(%rcx)
358
	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
Linus Torvalds's avatar
Linus Torvalds committed
359
	jnz tracesys
360
system_call_fastpath:
Linus Torvalds's avatar
Linus Torvalds committed
361
362
363
364
365
366
367
368
369
370
	cmpq $__NR_syscall_max,%rax
	ja badsys
	movq %r10,%rcx
	call *sys_call_table(,%rax,8)  # XXX:	 rip relative
	movq %rax,RAX-ARGOFFSET(%rsp)
/*
 * Syscall return path ending with SYSRET (fast path)
 * Has incomplete stack frame and undefined top of stack. 
 */		
ret_from_sys_call:
371
	movl $_TIF_ALLWORK_MASK,%edi
Linus Torvalds's avatar
Linus Torvalds committed
372
373
	/* edi:	flagmask */
sysret_check:		
374
	LOCKDEP_SYS_EXIT
Linus Torvalds's avatar
Linus Torvalds committed
375
	GET_THREAD_INFO(%rcx)
376
	DISABLE_INTERRUPTS(CLBR_NONE)
377
	TRACE_IRQS_OFF
Glauber Costa's avatar
Glauber Costa committed
378
	movl TI_flags(%rcx),%edx
Linus Torvalds's avatar
Linus Torvalds committed
379
380
	andl %edi,%edx
	jnz  sysret_careful 
381
	CFI_REMEMBER_STATE
382
383
384
385
	/*
	 * sysretq will re-enable interrupts:
	 */
	TRACE_IRQS_ON
Linus Torvalds's avatar
Linus Torvalds committed
386
	movq RIP-ARGOFFSET(%rsp),%rcx
387
	CFI_REGISTER	rip,rcx
Linus Torvalds's avatar
Linus Torvalds committed
388
	RESTORE_ARGS 0,-ARG_SKIP,1
389
	/*CFI_REGISTER	rflags,r11*/
390
	movq	%gs:pda_oldrsp, %rsp
391
	USERGS_SYSRET64
Linus Torvalds's avatar
Linus Torvalds committed
392

393
	CFI_RESTORE_STATE
Linus Torvalds's avatar
Linus Torvalds committed
394
395
396
397
398
	/* Handle reschedules */
	/* edx:	work, edi: workmask */	
sysret_careful:
	bt $TIF_NEED_RESCHED,%edx
	jnc sysret_signal
399
	TRACE_IRQS_ON
400
	ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds's avatar
Linus Torvalds committed
401
	pushq %rdi
402
	CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds's avatar
Linus Torvalds committed
403
404
	call schedule
	popq  %rdi
405
	CFI_ADJUST_CFA_OFFSET -8
Linus Torvalds's avatar
Linus Torvalds committed
406
407
408
409
	jmp sysret_check

	/* Handle a signal */ 
sysret_signal:
410
	TRACE_IRQS_ON
411
	ENABLE_INTERRUPTS(CLBR_NONE)
412
413
414
415
#ifdef CONFIG_AUDITSYSCALL
	bt $TIF_SYSCALL_AUDIT,%edx
	jc sysret_audit
#endif
416
	/* edx:	work flags (arg3) */
Linus Torvalds's avatar
Linus Torvalds committed
417
418
419
420
	leaq do_notify_resume(%rip),%rax
	leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
	xorl %esi,%esi # oldset -> arg2
	call ptregscall_common
421
	movl $_TIF_WORK_MASK,%edi
422
423
	/* Use IRET because user could have changed frame. This
	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
424
	DISABLE_INTERRUPTS(CLBR_NONE)
425
	TRACE_IRQS_OFF
426
	jmp int_with_check
Linus Torvalds's avatar
Linus Torvalds committed
427
	
428
429
430
431
badsys:
	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
	jmp ret_from_sys_call

432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
#ifdef CONFIG_AUDITSYSCALL
	/*
	 * Fast path for syscall audit without full syscall trace.
	 * We just call audit_syscall_entry() directly, and then
	 * jump back to the normal fast path.
	 */
auditsys:
	movq %r10,%r9			/* 6th arg: 4th syscall arg */
	movq %rdx,%r8			/* 5th arg: 3rd syscall arg */
	movq %rsi,%rcx			/* 4th arg: 2nd syscall arg */
	movq %rdi,%rdx			/* 3rd arg: 1st syscall arg */
	movq %rax,%rsi			/* 2nd arg: syscall number */
	movl $AUDIT_ARCH_X86_64,%edi	/* 1st arg: audit arch */
	call audit_syscall_entry
	LOAD_ARGS 0		/* reload call-clobbered registers */
	jmp system_call_fastpath

	/*
	 * Return fast path for syscall audit.  Call audit_syscall_exit()
	 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
	 * masked off.
	 */
sysret_audit:
	movq %rax,%rsi		/* second arg, syscall return value */
	cmpq $0,%rax		/* is it < 0? */
	setl %al		/* 1 if so, 0 if not */
	movzbl %al,%edi		/* zero-extend that into %edi */
	inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
	call audit_syscall_exit
	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
	jmp sysret_check
#endif	/* CONFIG_AUDITSYSCALL */

Linus Torvalds's avatar
Linus Torvalds committed
465
466
	/* Do syscall tracing */
tracesys:			 
467
468
469
470
#ifdef CONFIG_AUDITSYSCALL
	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
	jz auditsys
#endif
Linus Torvalds's avatar
Linus Torvalds committed
471
	SAVE_REST
Roland McGrath's avatar
Roland McGrath committed
472
	movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
Linus Torvalds's avatar
Linus Torvalds committed
473
474
475
	FIXUP_TOP_OF_STACK %rdi
	movq %rsp,%rdi
	call syscall_trace_enter
476
477
478
479
480
481
	/*
	 * Reload arg registers from stack in case ptrace changed them.
	 * We don't reload %rax because syscall_trace_enter() returned
	 * the value it wants us to use in the table lookup.
	 */
	LOAD_ARGS ARGOFFSET, 1
Linus Torvalds's avatar
Linus Torvalds committed
482
483
	RESTORE_REST
	cmpq $__NR_syscall_max,%rax
Roland McGrath's avatar
Roland McGrath committed
484
	ja   int_ret_from_sys_call	/* RAX(%rsp) set to -ENOSYS above */
Linus Torvalds's avatar
Linus Torvalds committed
485
486
	movq %r10,%rcx	/* fixup for C */
	call *sys_call_table(,%rax,8)
Roland McGrath's avatar
Roland McGrath committed
487
	movq %rax,RAX-ARGOFFSET(%rsp)
488
	/* Use IRET because user could have changed frame */
Linus Torvalds's avatar
Linus Torvalds committed
489
490
491
492
		
/* 
 * Syscall return path ending with IRET.
 * Has correct top of stack, but partial stack frame.
493
494
495
 */
	.globl int_ret_from_sys_call
int_ret_from_sys_call:
496
	DISABLE_INTERRUPTS(CLBR_NONE)
497
	TRACE_IRQS_OFF
Linus Torvalds's avatar
Linus Torvalds committed
498
499
500
501
502
	testl $3,CS-ARGOFFSET(%rsp)
	je retint_restore_args
	movl $_TIF_ALLWORK_MASK,%edi
	/* edi:	mask to check */
int_with_check:
503
	LOCKDEP_SYS_EXIT_IRQ
Linus Torvalds's avatar
Linus Torvalds committed
504
	GET_THREAD_INFO(%rcx)
Glauber Costa's avatar
Glauber Costa committed
505
	movl TI_flags(%rcx),%edx
Linus Torvalds's avatar
Linus Torvalds committed
506
507
	andl %edi,%edx
	jnz   int_careful
Glauber Costa's avatar
Glauber Costa committed
508
	andl    $~TS_COMPAT,TI_status(%rcx)
Linus Torvalds's avatar
Linus Torvalds committed
509
510
511
512
513
514
515
516
	jmp   retint_swapgs

	/* Either reschedule or signal or syscall exit tracking needed. */
	/* First do a reschedule test. */
	/* edx:	work, edi: workmask */
int_careful:
	bt $TIF_NEED_RESCHED,%edx
	jnc  int_very_careful
517
	TRACE_IRQS_ON
518
	ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds's avatar
Linus Torvalds committed
519
	pushq %rdi
520
	CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds's avatar
Linus Torvalds committed
521
522
	call schedule
	popq %rdi
523
	CFI_ADJUST_CFA_OFFSET -8
524
	DISABLE_INTERRUPTS(CLBR_NONE)
525
	TRACE_IRQS_OFF
Linus Torvalds's avatar
Linus Torvalds committed
526
527
528
529
	jmp int_with_check

	/* handle signals and tracing -- both require a full stack frame */
int_very_careful:
530
	TRACE_IRQS_ON
531
	ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds's avatar
Linus Torvalds committed
532
533
	SAVE_REST
	/* Check for syscall exit trace */	
534
	testl $_TIF_WORK_SYSCALL_EXIT,%edx
Linus Torvalds's avatar
Linus Torvalds committed
535
536
	jz int_signal
	pushq %rdi
537
	CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds's avatar
Linus Torvalds committed
538
539
540
	leaq 8(%rsp),%rdi	# &ptregs -> arg1	
	call syscall_trace_leave
	popq %rdi
541
	CFI_ADJUST_CFA_OFFSET -8
542
	andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
Linus Torvalds's avatar
Linus Torvalds committed
543
544
545
	jmp int_restore_rest
	
int_signal:
546
	testl $_TIF_DO_NOTIFY_MASK,%edx
Linus Torvalds's avatar
Linus Torvalds committed
547
548
549
550
	jz 1f
	movq %rsp,%rdi		# &ptregs -> arg1
	xorl %esi,%esi		# oldset -> arg2
	call do_notify_resume
Roland McGrath's avatar
Roland McGrath committed
551
1:	movl $_TIF_WORK_MASK,%edi
Linus Torvalds's avatar
Linus Torvalds committed
552
553
int_restore_rest:
	RESTORE_REST
554
	DISABLE_INTERRUPTS(CLBR_NONE)
555
	TRACE_IRQS_OFF
Linus Torvalds's avatar
Linus Torvalds committed
556
557
	jmp int_with_check
	CFI_ENDPROC
558
END(system_call)
Linus Torvalds's avatar
Linus Torvalds committed
559
560
561
562
563
564
565
566
567
568
569
		
/* 
 * Certain special system calls that need to save a complete full stack frame.
 */ 								
	
	.macro PTREGSCALL label,func,arg
	.globl \label
\label:
	leaq	\func(%rip),%rax
	leaq    -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
	jmp	ptregscall_common
570
END(\label)
Linus Torvalds's avatar
Linus Torvalds committed
571
572
	.endm

573
574
	CFI_STARTPROC

Linus Torvalds's avatar
Linus Torvalds committed
575
576
577
578
579
580
581
582
	PTREGSCALL stub_clone, sys_clone, %r8
	PTREGSCALL stub_fork, sys_fork, %rdi
	PTREGSCALL stub_vfork, sys_vfork, %rdi
	PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
	PTREGSCALL stub_iopl, sys_iopl, %rsi

ENTRY(ptregscall_common)
	popq %r11
583
584
	CFI_ADJUST_CFA_OFFSET -8
	CFI_REGISTER rip, r11
Linus Torvalds's avatar
Linus Torvalds committed
585
586
	SAVE_REST
	movq %r11, %r15
587
	CFI_REGISTER rip, r15
Linus Torvalds's avatar
Linus Torvalds committed
588
589
590
591
	FIXUP_TOP_OF_STACK %r11
	call *%rax
	RESTORE_TOP_OF_STACK %r11
	movq %r15, %r11
592
	CFI_REGISTER rip, r11
Linus Torvalds's avatar
Linus Torvalds committed
593
594
	RESTORE_REST
	pushq %r11
595
596
	CFI_ADJUST_CFA_OFFSET 8
	CFI_REL_OFFSET rip, 0
Linus Torvalds's avatar
Linus Torvalds committed
597
598
	ret
	CFI_ENDPROC
599
END(ptregscall_common)
Linus Torvalds's avatar
Linus Torvalds committed
600
601
602
603
	
ENTRY(stub_execve)
	CFI_STARTPROC
	popq %r11
604
605
	CFI_ADJUST_CFA_OFFSET -8
	CFI_REGISTER rip, r11
Linus Torvalds's avatar
Linus Torvalds committed
606
607
	SAVE_REST
	FIXUP_TOP_OF_STACK %r11
608
	movq %rsp, %rcx
Linus Torvalds's avatar
Linus Torvalds committed
609
610
611
612
613
614
	call sys_execve
	RESTORE_TOP_OF_STACK %r11
	movq %rax,RAX(%rsp)
	RESTORE_REST
	jmp int_ret_from_sys_call
	CFI_ENDPROC
615
END(stub_execve)
Linus Torvalds's avatar
Linus Torvalds committed
616
617
618
619
620
621
622
	
/*
 * sigreturn is special because it needs to restore all registers on return.
 * This cannot be done with SYSRET, so use the IRET return path instead.
 */                
ENTRY(stub_rt_sigreturn)
	CFI_STARTPROC
623
624
	addq $8, %rsp
	CFI_ADJUST_CFA_OFFSET	-8
Linus Torvalds's avatar
Linus Torvalds committed
625
626
627
628
629
630
631
632
	SAVE_REST
	movq %rsp,%rdi
	FIXUP_TOP_OF_STACK %r11
	call sys_rt_sigreturn
	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
	RESTORE_REST
	jmp int_ret_from_sys_call
	CFI_ENDPROC
633
END(stub_rt_sigreturn)
Linus Torvalds's avatar
Linus Torvalds committed
634

635
636
637
638
639
/*
 * initial frame state for interrupts and exceptions
 */
	.macro _frame ref
	CFI_STARTPROC simple
640
	CFI_SIGNAL_FRAME
641
642
643
644
645
646
647
648
649
650
651
652
653
654
	CFI_DEF_CFA rsp,SS+8-\ref
	/*CFI_REL_OFFSET ss,SS-\ref*/
	CFI_REL_OFFSET rsp,RSP-\ref
	/*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
	/*CFI_REL_OFFSET cs,CS-\ref*/
	CFI_REL_OFFSET rip,RIP-\ref
	.endm

/* initial frame state for interrupts (and exceptions without error code) */
#define INTR_FRAME _frame RIP
/* initial frame state for exceptions with error code (and interrupts with
   vector already pushed) */
#define XCPT_FRAME _frame ORIG_RAX

Linus Torvalds's avatar
Linus Torvalds committed
655
656
657
658
659
660
661
662
663
664
665
666
667
/* 
 * Interrupt entry/exit.
 *
 * Interrupt entry points save only callee clobbered registers in fast path.
 *	
 * Entry runs with interrupts off.	
 */ 

/* 0(%rsp): interrupt number */ 
	.macro interrupt func
	cld
	SAVE_ARGS
	leaq -ARGOFFSET(%rsp),%rdi	# arg1 for handler
668
669
670
671
672
	pushq %rbp
	CFI_ADJUST_CFA_OFFSET	8
	CFI_REL_OFFSET		rbp, 0
	movq %rsp,%rbp
	CFI_DEF_CFA_REGISTER	rbp
Linus Torvalds's avatar
Linus Torvalds committed
673
674
	testl $3,CS(%rdi)
	je 1f
675
	SWAPGS
676
677
678
679
680
681
	/* irqcount is used to check if a CPU is already on an interrupt
	   stack or not. While this is essentially redundant with preempt_count
	   it is a little cheaper to use a separate counter in the PDA
	   (short of moving irq_enter into assembly, which would be too
	    much work) */
1:	incl	%gs:pda_irqcount
682
	cmoveq %gs:pda_irqstackptr,%rsp
683
	push    %rbp			# backlink for old unwinder
684
685
686
687
	/*
	 * We entered an interrupt context - irqs are off:
	 */
	TRACE_IRQS_OFF
Linus Torvalds's avatar
Linus Torvalds committed
688
689
690
691
	call \func
	.endm

ENTRY(common_interrupt)
692
	XCPT_FRAME
Linus Torvalds's avatar
Linus Torvalds committed
693
694
	interrupt do_IRQ
	/* 0(%rsp): oldrsp-ARGOFFSET */
695
ret_from_intr:
696
	DISABLE_INTERRUPTS(CLBR_NONE)
697
	TRACE_IRQS_OFF
698
	decl %gs:pda_irqcount
699
	leaveq
700
	CFI_DEF_CFA_REGISTER	rsp
701
	CFI_ADJUST_CFA_OFFSET	-8
702
exit_intr:
Linus Torvalds's avatar
Linus Torvalds committed
703
704
705
706
707
708
709
710
711
712
713
	GET_THREAD_INFO(%rcx)
	testl $3,CS-ARGOFFSET(%rsp)
	je retint_kernel
	
	/* Interrupt came from user space */
	/*
	 * Has a correct top of stack, but a partial stack frame
	 * %rcx: thread info. Interrupts off.
	 */		
retint_with_reschedule:
	movl $_TIF_WORK_MASK,%edi
714
retint_check:
715
	LOCKDEP_SYS_EXIT_IRQ
Glauber Costa's avatar
Glauber Costa committed
716
	movl TI_flags(%rcx),%edx
Linus Torvalds's avatar
Linus Torvalds committed
717
	andl %edi,%edx
718
	CFI_REMEMBER_STATE
Linus Torvalds's avatar
Linus Torvalds committed
719
	jnz  retint_careful
720
721

retint_swapgs:		/* return to user-space */
722
723
724
	/*
	 * The iretq could re-enable interrupts:
	 */
725
	DISABLE_INTERRUPTS(CLBR_ANY)
726
	TRACE_IRQS_IRETQ
727
	SWAPGS
728
729
	jmp restore_args

730
retint_restore_args:	/* return to kernel space */
731
	DISABLE_INTERRUPTS(CLBR_ANY)
732
733
734
735
736
	/*
	 * The iretq could re-enable interrupts:
	 */
	TRACE_IRQS_IRETQ
restore_args:
Ingo Molnar's avatar
Ingo Molnar committed
737
738
	RESTORE_ARGS 0,8,0

739
irq_return:
740
	INTERRUPT_RETURN
Ingo Molnar's avatar
Ingo Molnar committed
741
742
743
744
745
746

	.section __ex_table, "a"
	.quad irq_return, bad_iret
	.previous

#ifdef CONFIG_PARAVIRT
747
ENTRY(native_iret)
Linus Torvalds's avatar
Linus Torvalds committed
748
749
750
	iretq

	.section __ex_table,"a"
751
	.quad native_iret, bad_iret
Linus Torvalds's avatar
Linus Torvalds committed
752
	.previous
Ingo Molnar's avatar
Ingo Molnar committed
753
754
#endif

Linus Torvalds's avatar
Linus Torvalds committed
755
756
	.section .fixup,"ax"
bad_iret:
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
	/*
	 * The iret traps when the %cs or %ss being restored is bogus.
	 * We've lost the original trap vector and error code.
	 * #GPF is the most likely one to get for an invalid selector.
	 * So pretend we completed the iret and took the #GPF in user mode.
	 *
	 * We are now running with the kernel GS after exception recovery.
	 * But error_entry expects us to have user GS to match the user %cs,
	 * so swap back.
	 */
	pushq $0

	SWAPGS
	jmp general_protection

772
773
	.previous

774
	/* edi: workmask, edx: work */
Linus Torvalds's avatar
Linus Torvalds committed
775
retint_careful:
776
	CFI_RESTORE_STATE
Linus Torvalds's avatar
Linus Torvalds committed
777
778
	bt    $TIF_NEED_RESCHED,%edx
	jnc   retint_signal
779
	TRACE_IRQS_ON
780
	ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds's avatar
Linus Torvalds committed
781
	pushq %rdi
782
	CFI_ADJUST_CFA_OFFSET	8
Linus Torvalds's avatar
Linus Torvalds committed
783
784
	call  schedule
	popq %rdi		
785
	CFI_ADJUST_CFA_OFFSET	-8
Linus Torvalds's avatar
Linus Torvalds committed
786
	GET_THREAD_INFO(%rcx)
787
	DISABLE_INTERRUPTS(CLBR_NONE)
788
	TRACE_IRQS_OFF
Linus Torvalds's avatar
Linus Torvalds committed
789
790
791
	jmp retint_check
	
retint_signal:
792
	testl $_TIF_DO_NOTIFY_MASK,%edx
793
	jz    retint_swapgs
794
	TRACE_IRQS_ON
795
	ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds's avatar
Linus Torvalds committed
796
797
	SAVE_REST
	movq $-1,ORIG_RAX(%rsp) 			
798
	xorl %esi,%esi		# oldset
Linus Torvalds's avatar
Linus Torvalds committed
799
800
801
	movq %rsp,%rdi		# &pt_regs
	call do_notify_resume
	RESTORE_REST
802
	DISABLE_INTERRUPTS(CLBR_NONE)
803
	TRACE_IRQS_OFF
804
	GET_THREAD_INFO(%rcx)
Roland McGrath's avatar
Roland McGrath committed
805
	jmp retint_with_reschedule
Linus Torvalds's avatar
Linus Torvalds committed
806
807
808
809

#ifdef CONFIG_PREEMPT
	/* Returning to kernel space. Check if we need preemption */
	/* rcx:	 threadinfo. interrupts off. */
810
ENTRY(retint_kernel)
Glauber Costa's avatar
Glauber Costa committed
811
	cmpl $0,TI_preempt_count(%rcx)
Linus Torvalds's avatar
Linus Torvalds committed
812
	jnz  retint_restore_args
Glauber Costa's avatar
Glauber Costa committed
813
	bt  $TIF_NEED_RESCHED,TI_flags(%rcx)
Linus Torvalds's avatar
Linus Torvalds committed
814
815
816
817
818
819
	jnc  retint_restore_args
	bt   $9,EFLAGS-ARGOFFSET(%rsp)	/* interrupts off? */
	jnc  retint_restore_args
	call preempt_schedule_irq
	jmp exit_intr
#endif	
820

Linus Torvalds's avatar
Linus Torvalds committed
821
	CFI_ENDPROC
822
END(common_interrupt)
Linus Torvalds's avatar
Linus Torvalds committed
823
824
825
826
827
	
/*
 * APIC interrupts.
 */		
	.macro apicinterrupt num,func
828
	INTR_FRAME
829
	pushq $~(\num)
830
	CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds's avatar
Linus Torvalds committed
831
832
833
834
835
836
837
	interrupt \func
	jmp ret_from_intr
	CFI_ENDPROC
	.endm

ENTRY(thermal_interrupt)
	apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
838
END(thermal_interrupt)
Linus Torvalds's avatar
Linus Torvalds committed
839

840
841
ENTRY(threshold_interrupt)
	apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
842
END(threshold_interrupt)
843

Linus Torvalds's avatar
Linus Torvalds committed
844
845
846
#ifdef CONFIG_SMP	
ENTRY(reschedule_interrupt)
	apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
847
END(reschedule_interrupt)
Linus Torvalds's avatar
Linus Torvalds committed
848

849
850
851
	.macro INVALIDATE_ENTRY num
ENTRY(invalidate_interrupt\num)
	apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt	
852
END(invalidate_interrupt\num)
853
854
855
856
857
858
859
860
861
862
	.endm

	INVALIDATE_ENTRY 0
	INVALIDATE_ENTRY 1
	INVALIDATE_ENTRY 2
	INVALIDATE_ENTRY 3
	INVALIDATE_ENTRY 4
	INVALIDATE_ENTRY 5
	INVALIDATE_ENTRY 6
	INVALIDATE_ENTRY 7
Linus Torvalds's avatar
Linus Torvalds committed
863
864
865

ENTRY(call_function_interrupt)
	apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
866
END(call_function_interrupt)
867
868
869
ENTRY(call_function_single_interrupt)
	apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
END(call_function_single_interrupt)
870
871
872
ENTRY(irq_move_cleanup_interrupt)
	apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
END(irq_move_cleanup_interrupt)
Linus Torvalds's avatar
Linus Torvalds committed
873
874
875
876
#endif

ENTRY(apic_timer_interrupt)
	apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
877
END(apic_timer_interrupt)
Linus Torvalds's avatar
Linus Torvalds committed
878

879
880
881
882
ENTRY(uv_bau_message_intr1)
	apicinterrupt 220,uv_bau_message_interrupt
END(uv_bau_message_intr1)

Linus Torvalds's avatar
Linus Torvalds committed
883
884
ENTRY(error_interrupt)
	apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
885
END(error_interrupt)
Linus Torvalds's avatar
Linus Torvalds committed
886
887
888

ENTRY(spurious_interrupt)
	apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
889
END(spurious_interrupt)
Linus Torvalds's avatar
Linus Torvalds committed
890
891
892
893
894
				
/*
 * Exception entry points.
 */ 		
	.macro zeroentry sym
895
	INTR_FRAME
896
	PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds's avatar
Linus Torvalds committed
897
	pushq $0	/* push error code/oldrax */ 
898
	CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds's avatar
Linus Torvalds committed
899
	pushq %rax	/* push real oldrax to the rdi slot */ 
900
	CFI_ADJUST_CFA_OFFSET 8
901
	CFI_REL_OFFSET rax,0
Linus Torvalds's avatar
Linus Torvalds committed
902
903
	leaq  \sym(%rip),%rax
	jmp error_entry
904
	CFI_ENDPROC
Linus Torvalds's avatar
Linus Torvalds committed
905
906
907
	.endm	

	.macro errorentry sym
908
	XCPT_FRAME
909
	PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds's avatar
Linus Torvalds committed
910
	pushq %rax
911
	CFI_ADJUST_CFA_OFFSET 8
912
	CFI_REL_OFFSET rax,0
Linus Torvalds's avatar
Linus Torvalds committed
913
914
	leaq  \sym(%rip),%rax
	jmp error_entry
915
	CFI_ENDPROC
Linus Torvalds's avatar
Linus Torvalds committed
916
917
918
919
	.endm

	/* error code is on the stack already */
	/* handle NMI like exceptions that can happen everywhere */
920
	.macro paranoidentry sym, ist=0, irqtrace=1
Linus Torvalds's avatar
Linus Torvalds committed
921
922
923
924
925
926
927
	SAVE_ALL
	cld
	movl $1,%ebx
	movl  $MSR_GS_BASE,%ecx
	rdmsr
	testl %edx,%edx
	js    1f
928
	SWAPGS
Linus Torvalds's avatar
Linus Torvalds committed
929
	xorl  %ebx,%ebx
930
931
932
933
934
1:
	.if \ist
	movq	%gs:pda_data_offset, %rbp
	.endif
	movq %rsp,%rdi
Linus Torvalds's avatar
Linus Torvalds committed
935
936
	movq ORIG_RAX(%rsp),%rsi
	movq $-1,ORIG_RAX(%rsp)
937
	.if \ist
938
	subq	$EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
939
	.endif
Linus Torvalds's avatar
Linus Torvalds committed
940
	call \sym
941
	.if \ist
942
	addq	$EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
943
	.endif
944
	DISABLE_INTERRUPTS(CLBR_NONE)
945
946
947
	.if \irqtrace
	TRACE_IRQS_OFF
	.endif
Linus Torvalds's avatar
Linus Torvalds committed
948
	.endm
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969

	/*
 	 * "Paranoid" exit path from exception stack.
  	 * Paranoid because this is used by NMIs and cannot take
	 * any kernel state for granted.
	 * We don't do kernel preemption checks here, because only
	 * NMI should be common and it does not enable IRQs and
	 * cannot get reschedule ticks.
	 *
	 * "trace" is 0 for the NMI handler only, because irq-tracing
	 * is fundamentally NMI-unsafe. (we cannot change the soft and
	 * hard flags at once, atomically)
	 */
	.macro paranoidexit trace=1
	/* ebx:	no swapgs flag */
paranoid_exit\trace:
	testl %ebx,%ebx				/* swapgs needed? */
	jnz paranoid_restore\trace
	testl $3,CS(%rsp)
	jnz   paranoid_userspace\trace
paranoid_swapgs\trace:
970
	.if \trace
971
	TRACE_IRQS_IRETQ 0
972
	.endif
973
	SWAPGS_UNSAFE_STACK
974
975
paranoid_restore\trace:
	RESTORE_ALL 8
Ingo Molnar's avatar
Ingo Molnar committed
976
	jmp irq_return
977
978
paranoid_userspace\trace:
	GET_THREAD_INFO(%rcx)
Glauber Costa's avatar
Glauber Costa committed
979
	movl TI_flags(%rcx),%ebx
980
981
982
983
984
985
986
987
988
989
990
	andl $_TIF_WORK_MASK,%ebx
	jz paranoid_swapgs\trace
	movq %rsp,%rdi			/* &pt_regs */
	call sync_regs
	movq %rax,%rsp			/* switch stack for scheduling */
	testl $_TIF_NEED_RESCHED,%ebx
	jnz paranoid_schedule\trace
	movl %ebx,%edx			/* arg3: thread flags */
	.if \trace
	TRACE_IRQS_ON
	.endif
991
	ENABLE_INTERRUPTS(CLBR_NONE)
992
993
994
	xorl %esi,%esi 			/* arg2: oldset */
	movq %rsp,%rdi 			/* arg1: &pt_regs */
	call do_notify_resume
995
	DISABLE_INTERRUPTS(CLBR_NONE)
996
997
998
999
1000
1001
1002
1003
	.if \trace
	TRACE_IRQS_OFF
	.endif
	jmp paranoid_userspace\trace
paranoid_schedule\trace:
	.if \trace
	TRACE_IRQS_ON
	.endif
1004
	ENABLE_INTERRUPTS(CLBR_ANY)
1005
	call schedule
1006
	DISABLE_INTERRUPTS(CLBR_ANY)
1007
1008
1009
1010
1011
1012
1013
	.if \trace
	TRACE_IRQS_OFF
	.endif
	jmp paranoid_userspace\trace
	CFI_ENDPROC
	.endm

Linus Torvalds's avatar
Linus Torvalds committed
1014
1015
1016
1017
/*
 * Exception entry point. This expects an error code/orig_rax on the stack
 * and the exception handler in %rax.	
 */ 		  				
1018
KPROBE_ENTRY(error_entry)
1019
	_frame RDI
1020
	CFI_REL_OFFSET rax,0
Linus Torvalds's avatar
Linus Torvalds committed
1021
1022
1023
1024
1025
1026
1027
	/* rdi slot contains rax, oldrax contains error code */
	cld	
	subq  $14*8,%rsp
	CFI_ADJUST_CFA_OFFSET	(14*8)
	movq %rsi,13*8(%rsp)
	CFI_REL_OFFSET	rsi,RSI
	movq 14*8(%rsp),%rsi	/* load rax from rdi slot */
1028
	CFI_REGISTER	rax,rsi
Linus Torvalds's avatar
Linus Torvalds committed
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
	movq %rdx,12*8(%rsp)
	CFI_REL_OFFSET	rdx,RDX
	movq %rcx,11*8(%rsp)
	CFI_REL_OFFSET	rcx,RCX
	movq %rsi,10*8(%rsp)	/* store rax */ 
	CFI_REL_OFFSET	rax,RAX
	movq %r8, 9*8(%rsp)
	CFI_REL_OFFSET	r8,R8
	movq %r9, 8*8(%rsp)
	CFI_REL_OFFSET	r9,R9
	movq %r10,7*8(%rsp)
	CFI_REL_OFFSET	r10,R10
	movq %r11,6*8(%rsp)
	CFI_REL_OFFSET	r11,R11
	movq %rbx,5*8(%rsp) 
	CFI_REL_OFFSET	rbx,RBX
	movq %rbp,4*8(%rsp) 
	CFI_REL_OFFSET	rbp,RBP
	movq %r12,3*8(%rsp) 
	CFI_REL_OFFSET	r12,R12
	movq %r13,2*8(%rsp) 
	CFI_REL_OFFSET	r13,R13
	movq %r14,1*8(%rsp) 
	CFI_REL_OFFSET	r14,R14
	movq %r15,(%rsp) 
	CFI_REL_OFFSET	r15,R15
	xorl %ebx,%ebx	
	testl $3,CS(%rsp)
	je  error_kernelspace
error_swapgs:	
1059
	SWAPGS
Linus Torvalds's avatar
Linus Torvalds committed
1060
1061
error_sti:	
	movq %rdi,RDI(%rsp) 	
1062
	CFI_REL_OFFSET	rdi,RDI
Linus Torvalds's avatar
Linus Torvalds committed
1063
1064
1065
1066
	movq %rsp,%rdi
	movq ORIG_RAX(%rsp),%rsi	/* get error code */ 
	movq $-1,ORIG_RAX(%rsp)
	call *%rax
1067
1068
1069
	/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
error_exit:
	movl %ebx,%eax
Linus Torvalds's avatar
Linus Torvalds committed
1070
	RESTORE_REST
1071
	DISABLE_INTERRUPTS(CLBR_NONE)
1072
	TRACE_IRQS_OFF
Linus Torvalds's avatar
Linus Torvalds committed
1073
1074
1075
	GET_THREAD_INFO(%rcx)	
	testl %eax,%eax
	jne  retint_kernel
1076
	LOCKDEP_SYS_EXIT_IRQ
Glauber Costa's avatar
Glauber Costa committed
1077
	movl  TI_flags(%rcx),%edx
Linus Torvalds's avatar
Linus Torvalds committed
1078
1079
1080
	movl  $_TIF_WORK_MASK,%edi
	andl  %edi,%edx
	jnz  retint_careful
1081
	jmp retint_swapgs
Linus Torvalds's avatar
Linus Torvalds committed
1082
1083
1084
1085
1086
1087
1088
1089
1090
	CFI_ENDPROC

error_kernelspace:
	incl %ebx
       /* There are two places in the kernel that can potentially fault with
          usergs. Handle them here. The exception handlers after
	   iret run with kernel gs again, so don't set the user space flag.
	   B stepping K8s sometimes report an truncated RIP for IRET 
	   exceptions returning to compat mode. Check for these here too. */
1091
1092
	leaq irq_return(%rip),%rcx
	cmpq %rcx,RIP(%rsp)
Linus Torvalds's avatar
Linus Torvalds committed
1093
	je   error_swapgs
1094
1095
	movl %ecx,%ecx	/* zero extend */
	cmpq %rcx,RIP(%rsp)
Linus Torvalds's avatar
Linus Torvalds committed
1096
1097
1098
1099
	je   error_swapgs
	cmpq $gs_change,RIP(%rsp)
        je   error_swapgs
	jmp  error_sti
1100
KPROBE_END(error_entry)
Linus Torvalds's avatar
Linus Torvalds committed
1101
1102
1103
	
       /* Reload gs selector with exception handling */
       /* edi:  new selector */ 
1104
ENTRY(native_load_gs_index)
1105
	CFI_STARTPROC
Linus Torvalds's avatar
Linus Torvalds committed
1106
	pushf
1107
	CFI_ADJUST_CFA_OFFSET 8
1108
1109
	DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
        SWAPGS
Linus Torvalds's avatar
Linus Torvalds committed
1110
1111
1112
gs_change:     
        movl %edi,%gs   
2:	mfence		/* workaround */
1113
	SWAPGS
Linus Torvalds's avatar
Linus Torvalds committed
1114
        popf
1115
	CFI_ADJUST_CFA_OFFSET -8
Linus Torvalds's avatar
Linus Torvalds committed
1116
        ret
1117
	CFI_ENDPROC
1118
ENDPROC(native_load_gs_index)
Linus Torvalds's avatar
Linus Torvalds committed
1119
1120
1121
1122
1123
1124
1125
1126
       
        .section __ex_table,"a"
        .align 8
        .quad gs_change,bad_gs
        .previous
        .section .fixup,"ax"
	/* running with kernelgs */
bad_gs: 
1127
	SWAPGS			/* switch back to user gs */
Linus Torvalds's avatar
Linus Torvalds committed
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
	xorl %eax,%eax
        movl %eax,%gs
        jmp  2b
        .previous       
	
/*
 * Create a kernel thread.
 *
 * C extern interface:
 *	extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 *
 * asm input arguments:
 *	rdi: fn, rsi: arg, rdx: flags
 */
ENTRY(kernel_thread)
	CFI_STARTPROC
	FAKE_STACK_FRAME $child_rip
	SAVE_ALL

	# rdi: flags, rsi: usp, rdx: will be &pt_regs
	movq %rdx,%rdi
	orq  kernel_thread_flags(%rip),%rdi
	movq $-1, %rsi
	movq %rsp, %rdx

	xorl %r8d,%r8d
	xorl %r9d,%r9d
	
	# clone now
	call do_fork
	movq %rax,RAX(%rsp)
	xorl %edi,%edi

	/*
	 * It isn't worth to check for reschedule here,
	 * so internally to the x86_64 port you can rely on kernel_thread()
	 * not to reschedule the child before returning, this avoids the need
	 * of hacks for example to fork off the per-CPU idle tasks.
         * [Hopefully no generic code relies on the reschedule -AK]	
	 */
	RESTORE_ALL
	UNFAKE_STACK_FRAME
	ret
	CFI_ENDPROC
1172
ENDPROC(kernel_thread)
Linus Torvalds's avatar
Linus Torvalds committed
1173
1174
	
child_rip:
1175
1176
	pushq $0		# fake return address
	CFI_STARTPROC
Linus Torvalds's avatar
Linus Torvalds committed
1177
1178
1179
1180
1181
1182
1183
1184
	/*
	 * Here we are in the child and the registers are set as they were
	 * at kernel_thread() invocation in the parent.
	 */
	movq %rdi, %rax
	movq %rsi, %rdi
	call *%rax
	# exit
1185
	mov %eax, %edi
Linus Torvalds's avatar
Linus Torvalds committed
1186
	call do_exit
1187
	CFI_ENDPROC
1188
ENDPROC(child_rip)
Linus Torvalds's avatar
Linus Torvalds committed
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199

/*
 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
 *
 * C extern interface:
 *	 extern long execve(char *name, char **argv, char **envp)
 *
 * asm input arguments:
 *	rdi: name, rsi: argv, rdx: envp
 *
 * We want to fallback into:
1200
 *	extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
1201
1202
 *
 * do_sys_execve asm fallback arguments:
1203
 *	rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
Linus Torvalds's avatar
Linus Torvalds committed
1204
 */
1205
ENTRY(kernel_execve)
Linus Torvalds's avatar
Linus Torvalds committed
1206
1207
1208
	CFI_STARTPROC
	FAKE_STACK_FRAME $0
	SAVE_ALL	
1209
	movq %rsp,%rcx
Linus Torvalds's avatar
Linus Torvalds committed
1210
1211
1212
1213
1214
1215
1216
1217
1218
	call sys_execve