#include #include #include #include #include #include #include #include #include #include #include #include #include /** * Bitmap of CPUs that have been initialized. */ static cpumask_t cpu_initialized_map; /** * Memory for STACKFAULT stacks, one for each CPU. */ char stackfault_stack[NR_CPUS][PAGE_SIZE] __attribute__((section(".bss.page_aligned"))); /** * Memory for DOUBLEFAULT stacks, one for each CPU. */ char doublefault_stack[NR_CPUS][PAGE_SIZE] __attribute__((section(".bss.page_aligned"))); /** * Memory for NMI stacks, one for each CPU. */ char nmi_stack[NR_CPUS][PAGE_SIZE] __attribute__((section(".bss.page_aligned"))); /** * Memory for DEBUG stacks, one for each CPU. */ char debug_stack[NR_CPUS][PAGE_SIZE] __attribute__((section(".bss.page_aligned"))); /** * Memory for MCE stacks, one for each CPU. */ char mce_stack[NR_CPUS][PAGE_SIZE] __attribute__((section(".bss.page_aligned"))); /** * Initializes the calling CPU's Per-CPU Data Area (PDA). * When in kernel mode, each CPU's GS.base is loaded with the address of the * CPU's PDA. This allows data in the PDA to be accessed using segment relative * accesses, like: * * movl $gs:pcurrent,%rdi // move CPU's current task pointer to rdi * * This is similar to thread-local data for user-level programs. */ void __init pda_init(unsigned int cpu, struct task_struct *task) { struct x8664_pda *pda = cpu_pda(cpu); /* * Point FS and GS at the NULL segment descriptor (entry 0) in the GDT. * x86_64 does away with a lot of segmentation cruftiness... there's no * need to set up specific GDT entries for FS or GS. */ asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); /* * Load the address of this CPU's PDA into this CPU's GS_BASE model * specific register. Upon entry to the kernel, the SWAPGS instruction * is used to load the value from MSR_GS_BASE into the GS segment * register's base address (GS.base). The user-level GS.base value * is stored in MSR_GS_BASE. When the kernel is exited, SWAPGS is * called again. */ mb(); wrmsrl(MSR_GS_BASE, pda); mb(); pda->cpunumber = cpu; pda->pcurrent = task; pda->active_aspace = task->aspace; pda->kernelstack = (unsigned long)task - PDA_STACKOFFSET + TASK_SIZE; pda->mmu_state = 0; } /** * Initializes the calling CPU's Control Register 4 (CR4). * The bootstrap assembly code has already partially setup this register. * We only touch the bits we care about, leaving the others untouched. */ static void __init cr4_init(void) { clear_in_cr4( X86_CR4_VME | /* Disable Virtual-8086 support/cruft */ X86_CR4_PVI | /* Disable support for VIF flag */ X86_CR4_TSD | /* Allow RDTSC instruction at user-level */ X86_CR4_DE /* Disable debugging extensions */ ); } /** * Initializes and installs the calling CPU's Global Descriptor Table (GDT). * Each CPU has its own GDT. */ static void __init gdt_init(void) { unsigned int cpu = this_cpu; /* The bootstrap CPU's GDT has already been setup */ if (cpu != 0) memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE); cpu_gdt_descr[cpu].size = GDT_SIZE; /* Install the CPU's GDT */ asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); /* * Install the CPU's LDT... Local Descriptor Table. * We have no need for a LDT, so we point it at the NULL descriptor. */ asm volatile("lldt %w0":: "r" (0)); } /** * Installs the calling CPU's Local Descriptor Table (LDT). * All CPUs share the same IDT. */ static void __init idt_init(void) { /* * The bootstrap CPU has already filled in the IDT table via the * interrupts_init() call in setup.c. All we need to do is tell the CPU * about it. */ asm volatile("lidt %0" :: "m" (idt_descr)); } /** * Initializes and installs the calling CPU's Task State Segment (TSS). */ static void __init tss_init(void) { unsigned int cpu = this_cpu; struct tss_struct *tss = &per_cpu(tss, cpu); int i; /* * Initialize the CPU's Interrupt Stack Table. * Certain exceptions and interrupts are handled with their own, * known good stack. The IST holds the address of these stacks. */ tss->ist[STACKFAULT_STACK-1] = (unsigned long)&stackfault_stack[cpu][0]; tss->ist[DOUBLEFAULT_STACK-1] = (unsigned long)&doublefault_stack[cpu][0]; tss->ist[NMI_STACK-1] = (unsigned long)&nmi_stack[cpu][0]; tss->ist[DEBUG_STACK-1] = (unsigned long)&debug_stack[cpu][0]; tss->ist[MCE_STACK-1] = (unsigned long)&mce_stack[cpu][0]; /* * Initialize the CPU's I/O permission bitmap. * The <= is required because the CPU will access up to 8 bits beyond * the end of the IO permission bitmap. */ tss->io_bitmap_base = offsetof(struct tss_struct, io_bitmap); for (i = 0; i <= IO_BITMAP_LONGS; i++) tss->io_bitmap[i] = ~0UL; /* * Install the CPU's TSS and load the CPU's Task Register (TR). * Each CPU has its own TSS. */ set_tss_desc(cpu, tss); asm volatile("ltr %w0":: "r" (GDT_ENTRY_TSS*8)); } /** * Initializes various Model Specific Registers (MSRs) of the calling CPU. */ static void __init msr_init(void) { /* * Setup the MSRs needed to support the SYSCALL and SYSRET * instructions. Really, you should read the manual to understand these * gems. In summary, STAR and LSTAR specify the CS, SS, and RIP to * install when the SYSCALL instruction is issued. They also specify the * CS and SS to install on SYSRET. * * On SYSCALL, the x86_64 CPU control unit uses STAR to load CS and SS and * LSTAR to load RIP. The old RIP is saved in RCX. * * On SYSRET, the control unit uses STAR to restore CS and SS. * RIP is loaded from RCX. * * SYSCALL_MASK specifies the RFLAGS to clear on SYSCALL. */ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | /* SYSRET CS+SS */ ((u64)__KERNEL_CS)<<32); /* SYSCALL CS+SS */ wrmsrl(MSR_LSTAR, asm_syscall); /* SYSCALL RIP */ wrmsrl(MSR_CSTAR, asm_syscall_ignore); /* RIP for compat. mode */ wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); /* * Setup MSRs needed to support the PDA. * pda_init() initialized MSR_GS_BASE already. When the SWAPGS * instruction is issued, the x86_64 control unit atomically swaps * MSR_GS_BASE and MSR_KERNEL_GS_BASE. So, when we call SWAPGS to * exit the kernel, the value in MSR_KERNEL_GS_BASE will be loaded. * User-space will see MSR_FS_BASE and MSR_GS_BASE both set to 0. */ wrmsrl(MSR_FS_BASE, 0); wrmsrl(MSR_KERNEL_GS_BASE, 0); } /** * Initializes the calling CPU's debug registers. */ static void __init dbg_init(void) { /* * Clear the CPU's debug registers. * DR[0-3] are Address-Breakpoint Registers * DR[4-5] are reserved and should not be used by software * DR6 is the Debug Status Register * DR7 is the Debug Control Register */ set_debugreg(0UL, 0); set_debugreg(0UL, 1); set_debugreg(0UL, 2); set_debugreg(0UL, 3); set_debugreg(0UL, 6); set_debugreg(0UL, 7); } void __init cpu_init(void) { /* * Get a reference to the currently executing task and the ID of the * CPU being initialized. We can't use the normal 'current' mechanism * since it relies on the PDA being initialized, which it isn't for all * CPUs other than the boot CPU (id=0). pda_init() is called below. */ struct task_struct *me = get_current_via_RSP(); unsigned int cpu = me->cpu_id; /* logical ID */ if (cpu_test_and_set(cpu, cpu_initialized_map)) panic("CPU#%u already initialized!\n", cpu); printk(KERN_DEBUG "Initializing CPU#%u\n", cpu); pda_init(cpu, me); /* per-cpu data area */ identify_cpu(); /* determine cpu features via CPUID */ cr4_init(); /* control register 4 */ gdt_init(); /* global descriptor table */ idt_init(); /* interrupt descriptor table */ tss_init(); /* task state segment */ msr_init(); /* misc. model specific registers */ dbg_init(); /* debug registers */ fpu_init(); /* floating point unit */ lapic_init(); /* local advanced prog. interrupt controller */ time_init(); /* detects CPU frequency, udelay(), etc. */ barrier(); /* compiler memory barrier, avoids reordering */ }