1 #include <lwk/linkage.h>
3 #include <arch/ptrace.h>
4 #include <arch/asm-offsets.h>
5 #include <arch/idt_vectors.h>
6 #include <arch/dwarf2.h>
11 * This performs the architecture-specific portion of a context switch.
12 * Normally, this is called in the context of prev and returns in the
13 * context of next. However, new tasks are handled differently. Since
14 * new tasks do not yet have a kernel context (rather, their kernel
15 * stack just has the pt_regs to use for the new task), execution returns
16 * directly to the new task, rather than context_switch().
23 * RAX = prev (same value as on entry)
26 * struct task_struct *arch_context_switch(struct task_struct *prev,
27 * struct task_struct *next);
28 * arch_context_switch() returns prev
30 * NOTE: External interrupts are disabled on entry.
32 ENTRY(arch_context_switch)
33 /* Save prev's callee saved registers (others saved by caller) */
42 /* Switch to next's stack */
43 movq %rsp, tsk_arch_rsp(%rdi)
44 movq tsk_arch_rsp(%rsi), %rsp
46 /* Call C code to do more stuff (save/restore FPU, update PDA, ...) */
47 call __arch_context_switch
48 /* returns with %rax set to prev */
50 movq %gs:pda_pcurrent, %rsi
52 /* New tasks need to be kick-started */
53 lock btr $_TF_NEW_TASK_BIT, tsk_arch_flags(%rsi)
56 /* Restore next's callee saved registers */
65 /* Return to context_switch(), with new task active */
69 call schedule_new_task_tail /* Finish up schedule(), drop locks, etc. */
70 testl $3, CS(%rsp) /* Sets ZF=1 if returning to kernel-space */
71 je 1f /* If ZF=1, leave kernel PDA in place */
72 swapgs /* Install the user PDA */
73 movq $0, %rax /* Zero all of the segment registers */
79 movq (%rsp), %r15 /* Unpack the pt_regs struct that */
80 movq 1*8(%rsp), %r14 /* __arch_context_switch() put at the top */
81 movq 2*8(%rsp), %r13 /* of the new task's kernel stack. */
94 add $128, %rsp /* Bump to point to RIP slot in pt_regs */
95 iretq /* Start the new task running */
97 END(arch_context_switch)
101 * This is the entry point for system calls. Upon entry we are still running
102 * with the user-level stack and the x86_64 CPU control unit has stashed the
103 * user-level RIP in RCX and RFLAGS in R11. External interrupts are diabled.
105 * The first thing this function does is generate a partial stack frame
106 * containing all caller-saved registers. After this is done, the system call
107 * number (stored in RAX by user-level) is used to index into the system call
108 * table (sys_call_table) and call the handler function. The handler function
109 * is responsible for saving all callee-saved registers... if it is a C
110 * function, callee-saved registers are saved automatically by the compiler.
112 * Immediately before calling the handler function, the kernel stack looks
115 * RIP = user-space RIP
116 * ORIG_RAX = system call number, passed from user-space
117 * RDI = ARG0, passed from user-space
118 * RSI = ARG1, passed from user-space
119 * RDX = ARG2, passed from user-space
120 * (junk) = normally RCX, but RCX is clobbered by SYSCALL
121 * RAX = system call number, passed from user-space
122 * R8 = ARG4, passed from user-space
123 * R9 = ARG5, passed from user-space
124 * R10 = ARG3, passed from user-space
125 * RSP -> R11 = user-space RFLAGS
127 * And the registers are setup as follows:
132 * RCX = ARG3 (was stored on R10 on entry)
135 * RAX = System call number
137 * NOTE: RCX and R11 are clobbered by system calls. This is due to the SYSCALL
138 * instruction using RCX and R11 to store RIP and RFLAGS before
139 * transfering control to the kernel. User-level will observe different
140 * values of RCX and R11 after SYSCALL than before.
142 * NOTE: External interrupts are disabled on entry.
146 * Enter from user-space
148 swapgs /* Load GS.base with kernel PDA addr */
149 movq %rsp, %gs:pda_oldrsp /* Backup user-space RSP */
150 movq %gs:pda_kernelstack, %rsp /* Load kernel stack */
153 * Save registers to kernel-stack
155 subq $10*8, %rsp /* Make room on the stack */
156 movq %rcx, 10*8(%rsp) /* Save user-space RIP */
157 movq %rax, 9*8(%rsp) /* Save syscall # in ORIG_RAX slot */
158 movq %rdi, 8*8(%rsp) /* Save user-space RDI (ARG0) */
159 movq %rsi, 7*8(%rsp) /* Save user-space RSI (ARG1) */
160 movq %rdx, 6*8(%rsp) /* Save user-space RDX (ARG2) */
161 movq %rcx, 5*8(%rsp) /* RCX is clobbered, save anyways */
162 movq %rax, 4*8(%rsp) /* Save user-space RAX (syscall #) */
163 movq %r8, 3*8(%rsp) /* Save user-space R8 (ARG4) */
164 movq %r9, 2*8(%rsp) /* Save user-space R9 (ARG5) */
165 movq %r10, 1*8(%rsp) /* Save user-space R10 (ARG3) */
166 movq %r11, (%rsp) /* Save user-space RFLAGS */
167 sti /* Enable external interrupts */
170 * Call the system call handler
172 movq %r10, %rcx /* Per x86_64 C ABI, RCX holds ARG3 */
173 cmp $__NR_syscall_max, %rax /* Make sure syscall # is in range */
175 call *sys_call_table(,%rax,8) /* Call the system call handler */
178 call syscall_not_implemented /* Print error and return */
180 movq %rax, 4*8(%rsp) /* Save return code in stack frame */
182 /* Reschedule, since we're returning to user space */
186 * Return to user-space
188 cli /* Disable external interrupts */
189 movq (%rsp), %r11 /* Restore RFLAGS for SYSRET */
190 movq 1*8(%rsp), %r10 /* Restore user-space R10 (ARG3) */
191 movq 2*8(%rsp), %r9 /* Restore user-space R9 (ARG5) */
192 movq 3*8(%rsp), %r8 /* Restore user-space R8 (ARG4) */
193 movq 4*8(%rsp), %rax /* Return syscall return code */
194 movq 6*8(%rsp), %rdx /* Restore user-space RDX (ARG2) */
195 movq 7*8(%rsp), %rsi /* Restore user-space RSI (ARG1) */
196 movq 8*8(%rsp), %rdi /* Restore user-space RDI (ARG0) */
197 movq 10*8(%rsp), %rcx /* Restore RIP for SYSRET */
198 movq %gs:pda_oldrsp, %rsp /* Restore user-space RSP */
199 swapgs /* Restore user-space GS.base */
200 sysretq /* Return to user-space */
205 * This is a handler for SYSCALL instructions issued from compatibility mode...
206 * we don't support them.
208 ENTRY(asm_syscall_ignore)
211 END(asm_syscall_ignore)
215 * This is the common entry point for all interrupts.
217 * Before calling the C handler function, the kernel stack looks like:
220 * SS (stack segment selector)
221 * RSP (stack pointer)
222 * RFLAGS (flags register)
223 * CS (code segment selector)
224 * RIP (instruction pointer)
225 * ERROR_CODE (0 for interrupts with no error code)
226 * RDI (this was the vector # on entry, we move to %rsi/ARG1)
242 * And the registers are setup as follows:
244 * RDI = ARG0: A fully populated 'struct pt_regs *'
245 * RSI = ARG1: The interrupt vector number
247 * NOTE: External interrupts are disabled on entry.
250 cld /* Clear direction flag */
253 * Save registers to kernel-stack
255 subq $14*8, %rsp /* Make room on the stack */
256 movq %rsi, 13*8(%rsp)
257 movq 14*8(%rsp), %rsi /* ARG1: the interrupt vector number */
258 movq %rdi, 14*8(%rsp)
259 movq %rdx, 12*8(%rsp)
260 movq %rcx, 11*8(%rsp)
261 movq %rax, 10*8(%rsp)
274 * Load kernel GS if we're coming from user-space
276 testl $3, CS(%rsp) /* Sets ZF=1 if coming from kspace */
277 je 1f /* If ZF=1, skip installing the PDA */
278 swapgs /* Install the PDA */
281 * Call C code interrupt handler entry point
283 movq %rsp, %rdi /* ARG0: pointer to 'struct pt_regs' */
284 sti /* Enable external interrupts */
285 call do_interrupt /* Call common C handler */
286 cli /* Disable external interrupts */
289 * If returning to user-space, reschedule and restore user-space GS
291 testl $3, CS(%rsp) /* Sets ZF=1 if returning to kspace */
292 je 2f /* If ZF=1, jump forward to 2: below */
293 sti /* Enable external interrupts */
294 call schedule /* Reschedule */
295 cli /* Disable external interrupts */
296 swapgs /* Restore uspace GS register */
299 * Restore registers and return to interrupted program
311 movq 10*8(%rsp), %rax
312 movq 11*8(%rsp), %rcx
313 movq 12*8(%rsp), %rdx
314 movq 13*8(%rsp), %rsi
315 movq 14*8(%rsp), %rdi
322 * This table contains an initial entry point function for each IDT vector.
323 * When an interrupt vector fires, the first instruction executed is at
326 * This table scheme is necessary because some x86_64 interrupts push an
327 * error code onto the stack and others do not. Additionally, there is no way
328 * for an interrupt handler to determine the interrupt vector that triggered
329 * it. Therefore, the functions in this table push a dummy error code onto
330 * the stack when necessary, always push the vector number, and then call a
331 * common handler (asm_interrupt).
333 * WARNING: Each function/entry in this table must be <= 16 bytes.
334 * Be very careful when adding instructions.
337 ENTRY(asm_idtvec_table)
339 .rept NUM_IDT_ENTRIES
340 .if vector<=7||vector==9||vector==15||vector==16||vector>=18
341 pushq $0 /* push dummy error_code */
343 pushq $vector /* push vector # into RDI slot */
344 jmp asm_interrupt /* call common handler */
346 /* Move onto next entry in table*/
350 END(asm_idtvec_table)
354 * Reload gs selector with exception handling.
360 CFI_ADJUST_CFA_OFFSET 8
365 2: mfence /* workaround */
368 CFI_ADJUST_CFA_OFFSET -8
371 ENDPROC(load_gs_index)
373 .section __ex_table,"a"
375 .quad gs_change,bad_gs
378 /* running with kernelgs */
380 swapgs /* switch back to user gs */