X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=kitten%2Farch%2Fx86_64%2Fkernel%2Fcpu.c;fp=kitten%2Farch%2Fx86_64%2Fkernel%2Fcpu.c;h=7ffe2c511bf93b1681b2a07eaea3594e81c758e8;hb=66a1a4c7a9edcd7d8bc207aca093d694a6e6b5b2;hp=0000000000000000000000000000000000000000;hpb=f7cf9c19ecb0a589dd45ae0d2c91814bd3c2acc2;p=palacios.git diff --git a/kitten/arch/x86_64/kernel/cpu.c b/kitten/arch/x86_64/kernel/cpu.c new file mode 100644 index 0000000..7ffe2c5 --- /dev/null +++ b/kitten/arch/x86_64/kernel/cpu.c @@ -0,0 +1,272 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * Bitmap of CPUs that have been initialized. + */ +static cpumask_t cpu_initialized_map; + +/** + * Memory for STACKFAULT stacks, one for each CPU. + */ +char stackfault_stack[NR_CPUS][PAGE_SIZE] +__attribute__((section(".bss.page_aligned"))); + +/** + * Memory for DOUBLEFAULT stacks, one for each CPU. + */ +char doublefault_stack[NR_CPUS][PAGE_SIZE] +__attribute__((section(".bss.page_aligned"))); + +/** + * Memory for NMI stacks, one for each CPU. + */ +char nmi_stack[NR_CPUS][PAGE_SIZE] +__attribute__((section(".bss.page_aligned"))); + +/** + * Memory for DEBUG stacks, one for each CPU. + */ +char debug_stack[NR_CPUS][PAGE_SIZE] +__attribute__((section(".bss.page_aligned"))); + +/** + * Memory for MCE stacks, one for each CPU. + */ +char mce_stack[NR_CPUS][PAGE_SIZE] +__attribute__((section(".bss.page_aligned"))); + +/** + * Initializes the calling CPU's Per-CPU Data Area (PDA). + * When in kernel mode, each CPU's GS.base is loaded with the address of the + * CPU's PDA. This allows data in the PDA to be accessed using segment relative + * accesses, like: + * + * movl $gs:pcurrent,%rdi // move CPU's current task pointer to rdi + * + * This is similar to thread-local data for user-level programs. + */ +void __init +pda_init(unsigned int cpu, struct task_struct *task) +{ + struct x8664_pda *pda = cpu_pda(cpu); + + /* + * Point FS and GS at the NULL segment descriptor (entry 0) in the GDT. + * x86_64 does away with a lot of segmentation cruftiness... there's no + * need to set up specific GDT entries for FS or GS. + */ + asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); + + /* + * Load the address of this CPU's PDA into this CPU's GS_BASE model + * specific register. Upon entry to the kernel, the SWAPGS instruction + * is used to load the value from MSR_GS_BASE into the GS segment + * register's base address (GS.base). The user-level GS.base value + * is stored in MSR_GS_BASE. When the kernel is exited, SWAPGS is + * called again. + */ + mb(); + wrmsrl(MSR_GS_BASE, pda); + mb(); + + pda->cpunumber = cpu; + pda->pcurrent = task; + pda->active_aspace = task->aspace; + pda->kernelstack = (unsigned long)task - PDA_STACKOFFSET + TASK_SIZE; + pda->mmu_state = 0; +} + +/** + * Initializes the calling CPU's Control Register 4 (CR4). + * The bootstrap assembly code has already partially setup this register. + * We only touch the bits we care about, leaving the others untouched. + */ +static void __init +cr4_init(void) +{ + clear_in_cr4( + X86_CR4_VME | /* Disable Virtual-8086 support/cruft */ + X86_CR4_PVI | /* Disable support for VIF flag */ + X86_CR4_TSD | /* Allow RDTSC instruction at user-level */ + X86_CR4_DE /* Disable debugging extensions */ + ); +} + +/** + * Initializes and installs the calling CPU's Global Descriptor Table (GDT). + * Each CPU has its own GDT. + */ +static void __init +gdt_init(void) +{ + unsigned int cpu = this_cpu; + + /* The bootstrap CPU's GDT has already been setup */ + if (cpu != 0) + memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE); + cpu_gdt_descr[cpu].size = GDT_SIZE; + + /* Install the CPU's GDT */ + asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); + + /* + * Install the CPU's LDT... Local Descriptor Table. + * We have no need for a LDT, so we point it at the NULL descriptor. + */ + asm volatile("lldt %w0":: "r" (0)); +} + +/** + * Installs the calling CPU's Local Descriptor Table (LDT). + * All CPUs share the same IDT. + */ +static void __init +idt_init(void) +{ + /* + * The bootstrap CPU has already filled in the IDT table via the + * interrupts_init() call in setup.c. All we need to do is tell the CPU + * about it. + */ + asm volatile("lidt %0" :: "m" (idt_descr)); +} + +/** + * Initializes and installs the calling CPU's Task State Segment (TSS). + */ +static void __init +tss_init(void) +{ + unsigned int cpu = this_cpu; + struct tss_struct *tss = &per_cpu(tss, cpu); + int i; + + /* + * Initialize the CPU's Interrupt Stack Table. + * Certain exceptions and interrupts are handled with their own, + * known good stack. The IST holds the address of these stacks. + */ + tss->ist[STACKFAULT_STACK-1] = (unsigned long)&stackfault_stack[cpu][0]; + tss->ist[DOUBLEFAULT_STACK-1] = (unsigned long)&doublefault_stack[cpu][0]; + tss->ist[NMI_STACK-1] = (unsigned long)&nmi_stack[cpu][0]; + tss->ist[DEBUG_STACK-1] = (unsigned long)&debug_stack[cpu][0]; + tss->ist[MCE_STACK-1] = (unsigned long)&mce_stack[cpu][0]; + + /* + * Initialize the CPU's I/O permission bitmap. + * The <= is required because the CPU will access up to 8 bits beyond + * the end of the IO permission bitmap. + */ + tss->io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + for (i = 0; i <= IO_BITMAP_LONGS; i++) + tss->io_bitmap[i] = ~0UL; + + /* + * Install the CPU's TSS and load the CPU's Task Register (TR). + * Each CPU has its own TSS. + */ + set_tss_desc(cpu, tss); + asm volatile("ltr %w0":: "r" (GDT_ENTRY_TSS*8)); +} + +/** + * Initializes various Model Specific Registers (MSRs) of the calling CPU. + */ +static void __init +msr_init(void) +{ + /* + * Setup the MSRs needed to support the SYSCALL and SYSRET + * instructions. Really, you should read the manual to understand these + * gems. In summary, STAR and LSTAR specify the CS, SS, and RIP to + * install when the SYSCALL instruction is issued. They also specify the + * CS and SS to install on SYSRET. + * + * On SYSCALL, the x86_64 CPU control unit uses STAR to load CS and SS and + * LSTAR to load RIP. The old RIP is saved in RCX. + * + * On SYSRET, the control unit uses STAR to restore CS and SS. + * RIP is loaded from RCX. + * + * SYSCALL_MASK specifies the RFLAGS to clear on SYSCALL. + */ + wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | /* SYSRET CS+SS */ + ((u64)__KERNEL_CS)<<32); /* SYSCALL CS+SS */ + wrmsrl(MSR_LSTAR, asm_syscall); /* SYSCALL RIP */ + wrmsrl(MSR_CSTAR, asm_syscall_ignore); /* RIP for compat. mode */ + wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); + + /* + * Setup MSRs needed to support the PDA. + * pda_init() initialized MSR_GS_BASE already. When the SWAPGS + * instruction is issued, the x86_64 control unit atomically swaps + * MSR_GS_BASE and MSR_KERNEL_GS_BASE. So, when we call SWAPGS to + * exit the kernel, the value in MSR_KERNEL_GS_BASE will be loaded. + * User-space will see MSR_FS_BASE and MSR_GS_BASE both set to 0. + */ + wrmsrl(MSR_FS_BASE, 0); + wrmsrl(MSR_KERNEL_GS_BASE, 0); +} + +/** + * Initializes the calling CPU's debug registers. + */ +static void __init +dbg_init(void) +{ + /* + * Clear the CPU's debug registers. + * DR[0-3] are Address-Breakpoint Registers + * DR[4-5] are reserved and should not be used by software + * DR6 is the Debug Status Register + * DR7 is the Debug Control Register + */ + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); +} + +void __init +cpu_init(void) +{ + /* + * Get a reference to the currently executing task and the ID of the + * CPU being initialized. We can't use the normal 'current' mechanism + * since it relies on the PDA being initialized, which it isn't for all + * CPUs other than the boot CPU (id=0). pda_init() is called below. + */ + struct task_struct *me = get_current_via_RSP(); + unsigned int cpu = me->cpu_id; /* logical ID */ + + if (cpu_test_and_set(cpu, cpu_initialized_map)) + panic("CPU#%u already initialized!\n", cpu); + printk(KERN_DEBUG "Initializing CPU#%u\n", cpu); + + pda_init(cpu, me); /* per-cpu data area */ + identify_cpu(); /* determine cpu features via CPUID */ + cr4_init(); /* control register 4 */ + gdt_init(); /* global descriptor table */ + idt_init(); /* interrupt descriptor table */ + tss_init(); /* task state segment */ + msr_init(); /* misc. model specific registers */ + dbg_init(); /* debug registers */ + fpu_init(); /* floating point unit */ + lapic_init(); /* local advanced prog. interrupt controller */ + time_init(); /* detects CPU frequency, udelay(), etc. */ + barrier(); /* compiler memory barrier, avoids reordering */ +}