#include #include #include #include #include #include #include /** * This performs the architecture-specific portion of a context switch. * Normally, this is called in the context of prev and returns in the * context of next. However, new tasks are handled differently. Since * new tasks do not yet have a kernel context (rather, their kernel * stack just has the pt_regs to use for the new task), execution returns * directly to the new task, rather than context_switch(). * * Input Registers: * RDI = prev * RSI = next * * Output Registers:: * RAX = prev (same value as on entry) * * C Prototype: * struct task_struct *arch_context_switch(struct task_struct *prev, * struct task_struct *next); * arch_context_switch() returns prev * * NOTE: External interrupts are disabled on entry. */ ENTRY(arch_context_switch) /* Save prev's callee saved registers (others saved by caller) */ pushf pushq %rbp pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 /* Switch to next's stack */ movq %rsp, tsk_arch_rsp(%rdi) movq tsk_arch_rsp(%rsi), %rsp /* Call C code to do more stuff (save/restore FPU, update PDA, ...) */ call __arch_context_switch /* returns with %rax set to prev */ movq %rax, %rdi movq %gs:pda_pcurrent, %rsi /* New tasks need to be kick-started */ lock btr $_TF_NEW_TASK_BIT, tsk_arch_flags(%rsi) jc kickstart_new_task /* Restore next's callee saved registers */ popq %r15 popq %r14 popq %r13 popq %r12 popq %rbx popq %rbp popf; /* Return to context_switch(), with new task active */ retq kickstart_new_task: call schedule_new_task_tail /* Finish up schedule(), drop locks, etc. */ testl $3, CS(%rsp) /* Sets ZF=1 if returning to kernel-space */ je 1f /* If ZF=1, leave kernel PDA in place */ swapgs /* Install the user PDA */ movq $0, %rax /* Zero all of the segment registers */ movl %eax, %ds movl %eax, %es movl %eax, %fs movl %eax, %gs 1: movq (%rsp), %r15 /* Unpack the pt_regs struct that */ movq 1*8(%rsp), %r14 /* __arch_context_switch() put at the top */ movq 2*8(%rsp), %r13 /* of the new task's kernel stack. */ movq 3*8(%rsp), %r12 movq 4*8(%rsp), %rbp movq 5*8(%rsp), %rbx movq 6*8(%rsp), %r11 movq 7*8(%rsp), %r10 movq 8*8(%rsp), %r9 movq 9*8(%rsp), %r8 movq 10*8(%rsp), %rax movq 11*8(%rsp), %rcx movq 12*8(%rsp), %rdx movq 13*8(%rsp), %rsi movq 14*8(%rsp), %rdi add $128, %rsp /* Bump to point to RIP slot in pt_regs */ iretq /* Start the new task running */ END(arch_context_switch) /** * This is the entry point for system calls. Upon entry we are still running * with the user-level stack and the x86_64 CPU control unit has stashed the * user-level RIP in RCX and RFLAGS in R11. External interrupts are diabled. * * The first thing this function does is generate a partial stack frame * containing all caller-saved registers. After this is done, the system call * number (stored in RAX by user-level) is used to index into the system call * table (sys_call_table) and call the handler function. The handler function * is responsible for saving all callee-saved registers... if it is a C * function, callee-saved registers are saved automatically by the compiler. * * Immediately before calling the handler function, the kernel stack looks * like: * * RIP = user-space RIP * ORIG_RAX = system call number, passed from user-space * RDI = ARG0, passed from user-space * RSI = ARG1, passed from user-space * RDX = ARG2, passed from user-space * (junk) = normally RCX, but RCX is clobbered by SYSCALL * RAX = system call number, passed from user-space * R8 = ARG4, passed from user-space * R9 = ARG5, passed from user-space * R10 = ARG3, passed from user-space * RSP -> R11 = user-space RFLAGS * * And the registers are setup as follows: * * RDI = ARG0 * RSI = ARG1 * RDX = ARG2 * RCX = ARG3 (was stored on R10 on entry) * R8 = ARG4 * R9 = ARG5 * RAX = System call number * * NOTE: RCX and R11 are clobbered by system calls. This is due to the SYSCALL * instruction using RCX and R11 to store RIP and RFLAGS before * transfering control to the kernel. User-level will observe different * values of RCX and R11 after SYSCALL than before. * * NOTE: External interrupts are disabled on entry. */ ENTRY(asm_syscall) /* * Enter from user-space */ swapgs /* Load GS.base with kernel PDA addr */ movq %rsp, %gs:pda_oldrsp /* Backup user-space RSP */ movq %gs:pda_kernelstack, %rsp /* Load kernel stack */ /* * Save registers to kernel-stack */ subq $10*8, %rsp /* Make room on the stack */ movq %rcx, 10*8(%rsp) /* Save user-space RIP */ movq %rax, 9*8(%rsp) /* Save syscall # in ORIG_RAX slot */ movq %rdi, 8*8(%rsp) /* Save user-space RDI (ARG0) */ movq %rsi, 7*8(%rsp) /* Save user-space RSI (ARG1) */ movq %rdx, 6*8(%rsp) /* Save user-space RDX (ARG2) */ movq %rcx, 5*8(%rsp) /* RCX is clobbered, save anyways */ movq %rax, 4*8(%rsp) /* Save user-space RAX (syscall #) */ movq %r8, 3*8(%rsp) /* Save user-space R8 (ARG4) */ movq %r9, 2*8(%rsp) /* Save user-space R9 (ARG5) */ movq %r10, 1*8(%rsp) /* Save user-space R10 (ARG3) */ movq %r11, (%rsp) /* Save user-space RFLAGS */ sti /* Enable external interrupts */ /* * Call the system call handler */ movq %r10, %rcx /* Per x86_64 C ABI, RCX holds ARG3 */ cmp $__NR_syscall_max, %rax /* Make sure syscall # is in range */ jg 1f call *sys_call_table(,%rax,8) /* Call the system call handler */ jmp 2f 1: call syscall_not_implemented /* Print error and return */ 2: movq %rax, 4*8(%rsp) /* Save return code in stack frame */ /* Reschedule, since we're returning to user space */ call schedule /* * Return to user-space */ cli /* Disable external interrupts */ movq (%rsp), %r11 /* Restore RFLAGS for SYSRET */ movq 1*8(%rsp), %r10 /* Restore user-space R10 (ARG3) */ movq 2*8(%rsp), %r9 /* Restore user-space R9 (ARG5) */ movq 3*8(%rsp), %r8 /* Restore user-space R8 (ARG4) */ movq 4*8(%rsp), %rax /* Return syscall return code */ movq 6*8(%rsp), %rdx /* Restore user-space RDX (ARG2) */ movq 7*8(%rsp), %rsi /* Restore user-space RSI (ARG1) */ movq 8*8(%rsp), %rdi /* Restore user-space RDI (ARG0) */ movq 10*8(%rsp), %rcx /* Restore RIP for SYSRET */ movq %gs:pda_oldrsp, %rsp /* Restore user-space RSP */ swapgs /* Restore user-space GS.base */ sysretq /* Return to user-space */ END(asm_syscall) /** * This is a handler for SYSCALL instructions issued from compatibility mode... * we don't support them. */ ENTRY(asm_syscall_ignore) mov $-ENOSYS,%eax sysret END(asm_syscall_ignore) /** * This is the common entry point for all interrupts. * * Before calling the C handler function, the kernel stack looks like: * * [...] * SS (stack segment selector) * RSP (stack pointer) * RFLAGS (flags register) * CS (code segment selector) * RIP (instruction pointer) * ERROR_CODE (0 for interrupts with no error code) * RDI (this was the vector # on entry, we move to %rsi/ARG1) * RSI * RDX * RCX * RAX * R8 * R9 * R10 * R11 * RBX * RBP * R12 * R13 * R14 * RSP -> R15 * * And the registers are setup as follows: * * RDI = ARG0: A fully populated 'struct pt_regs *' * RSI = ARG1: The interrupt vector number * * NOTE: External interrupts are disabled on entry. */ ENTRY(asm_interrupt) cld /* Clear direction flag */ /* * Save registers to kernel-stack */ subq $14*8, %rsp /* Make room on the stack */ movq %rsi, 13*8(%rsp) movq 14*8(%rsp), %rsi /* ARG1: the interrupt vector number */ movq %rdi, 14*8(%rsp) movq %rdx, 12*8(%rsp) movq %rcx, 11*8(%rsp) movq %rax, 10*8(%rsp) movq %r8, 9*8(%rsp) movq %r9, 8*8(%rsp) movq %r10, 7*8(%rsp) movq %r11, 6*8(%rsp) movq %rbx, 5*8(%rsp) movq %rbp, 4*8(%rsp) movq %r12, 3*8(%rsp) movq %r13, 2*8(%rsp) movq %r14, 1*8(%rsp) movq %r15, (%rsp) /* * Load kernel GS if we're coming from user-space */ testl $3, CS(%rsp) /* Sets ZF=1 if coming from kspace */ je 1f /* If ZF=1, skip installing the PDA */ swapgs /* Install the PDA */ 1: /* * Call C code interrupt handler entry point */ movq %rsp, %rdi /* ARG0: pointer to 'struct pt_regs' */ sti /* Enable external interrupts */ call do_interrupt /* Call common C handler */ cli /* Disable external interrupts */ /* * If returning to user-space, reschedule and restore user-space GS */ testl $3, CS(%rsp) /* Sets ZF=1 if returning to kspace */ je 2f /* If ZF=1, jump forward to 2: below */ sti /* Enable external interrupts */ call schedule /* Reschedule */ cli /* Disable external interrupts */ swapgs /* Restore uspace GS register */ 2: /* * Restore registers and return to interrupted program */ movq (%rsp), %r15 movq 1*8(%rsp), %r14 movq 2*8(%rsp), %r13 movq 3*8(%rsp), %r12 movq 4*8(%rsp), %rbp movq 5*8(%rsp), %rbx movq 6*8(%rsp), %r11 movq 7*8(%rsp), %r10 movq 8*8(%rsp), %r9 movq 9*8(%rsp), %r8 movq 10*8(%rsp), %rax movq 11*8(%rsp), %rcx movq 12*8(%rsp), %rdx movq 13*8(%rsp), %rsi movq 14*8(%rsp), %rdi addq $16*8, %rsp iretq END(asm_interrupt) /** * This table contains an initial entry point function for each IDT vector. * When an interrupt vector fires, the first instruction executed is at * table[vector]. * * This table scheme is necessary because some x86_64 interrupts push an * error code onto the stack and others do not. Additionally, there is no way * for an interrupt handler to determine the interrupt vector that triggered * it. Therefore, the functions in this table push a dummy error code onto * the stack when necessary, always push the vector number, and then call a * common handler (asm_interrupt). * * WARNING: Each function/entry in this table must be <= 16 bytes. * Be very careful when adding instructions. */ .align 16 ENTRY(asm_idtvec_table) vector=0 .rept NUM_IDT_ENTRIES .if vector<=7||vector==9||vector==15||vector==16||vector>=18 pushq $0 /* push dummy error_code */ .endif pushq $vector /* push vector # into RDI slot */ jmp asm_interrupt /* call common handler */ /* Move onto next entry in table*/ .align 16 vector=vector+1 .endr END(asm_idtvec_table) /** * Reload gs selector with exception handling. * edi: new selector */ ENTRY(load_gs_index) CFI_STARTPROC pushf CFI_ADJUST_CFA_OFFSET 8 cli swapgs gs_change: movl %edi,%gs 2: mfence /* workaround */ swapgs popf CFI_ADJUST_CFA_OFFSET -8 ret CFI_ENDPROC ENDPROC(load_gs_index) .section __ex_table,"a" .align 8 .quad gs_change,bad_gs .previous .section .fixup,"ax" /* running with kernelgs */ bad_gs: swapgs /* switch back to user gs */ xorl %eax,%eax movl %eax,%gs jmp 2b .previous