1 #include <lwk/kernel.h>
5 #include <lwk/ptrace.h>
6 #include <lwk/string.h>
8 #include <arch/processor.h>
10 #include <arch/proto.h>
11 #include <arch/i387.h>
12 #include <arch/apic.h>
16 * Bitmap of CPUs that have been initialized.
18 static cpumask_t cpu_initialized_map;
21 * Memory for STACKFAULT stacks, one for each CPU.
23 char stackfault_stack[NR_CPUS][PAGE_SIZE]
24 __attribute__((section(".bss.page_aligned")));
27 * Memory for DOUBLEFAULT stacks, one for each CPU.
29 char doublefault_stack[NR_CPUS][PAGE_SIZE]
30 __attribute__((section(".bss.page_aligned")));
33 * Memory for NMI stacks, one for each CPU.
35 char nmi_stack[NR_CPUS][PAGE_SIZE]
36 __attribute__((section(".bss.page_aligned")));
39 * Memory for DEBUG stacks, one for each CPU.
41 char debug_stack[NR_CPUS][PAGE_SIZE]
42 __attribute__((section(".bss.page_aligned")));
45 * Memory for MCE stacks, one for each CPU.
47 char mce_stack[NR_CPUS][PAGE_SIZE]
48 __attribute__((section(".bss.page_aligned")));
51 * Initializes the calling CPU's Per-CPU Data Area (PDA).
52 * When in kernel mode, each CPU's GS.base is loaded with the address of the
53 * CPU's PDA. This allows data in the PDA to be accessed using segment relative
56 * movl $gs:pcurrent,%rdi // move CPU's current task pointer to rdi
58 * This is similar to thread-local data for user-level programs.
61 pda_init(unsigned int cpu, struct task_struct *task)
63 struct x8664_pda *pda = cpu_pda(cpu);
66 * Point FS and GS at the NULL segment descriptor (entry 0) in the GDT.
67 * x86_64 does away with a lot of segmentation cruftiness... there's no
68 * need to set up specific GDT entries for FS or GS.
70 asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
73 * Load the address of this CPU's PDA into this CPU's GS_BASE model
74 * specific register. Upon entry to the kernel, the SWAPGS instruction
75 * is used to load the value from MSR_GS_BASE into the GS segment
76 * register's base address (GS.base). The user-level GS.base value
77 * is stored in MSR_GS_BASE. When the kernel is exited, SWAPGS is
81 wrmsrl(MSR_GS_BASE, pda);
86 pda->active_aspace = task->aspace;
87 pda->kernelstack = (unsigned long)task - PDA_STACKOFFSET + TASK_SIZE;
92 * Initializes the calling CPU's Control Register 4 (CR4).
93 * The bootstrap assembly code has already partially setup this register.
94 * We only touch the bits we care about, leaving the others untouched.
100 X86_CR4_VME | /* Disable Virtual-8086 support/cruft */
101 X86_CR4_PVI | /* Disable support for VIF flag */
102 X86_CR4_TSD | /* Allow RDTSC instruction at user-level */
103 X86_CR4_DE /* Disable debugging extensions */
108 * Initializes and installs the calling CPU's Global Descriptor Table (GDT).
109 * Each CPU has its own GDT.
114 unsigned int cpu = this_cpu;
116 /* The bootstrap CPU's GDT has already been setup */
118 memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE);
119 cpu_gdt_descr[cpu].size = GDT_SIZE;
121 /* Install the CPU's GDT */
122 asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
125 * Install the CPU's LDT... Local Descriptor Table.
126 * We have no need for a LDT, so we point it at the NULL descriptor.
128 asm volatile("lldt %w0":: "r" (0));
132 * Installs the calling CPU's Local Descriptor Table (LDT).
133 * All CPUs share the same IDT.
139 * The bootstrap CPU has already filled in the IDT table via the
140 * interrupts_init() call in setup.c. All we need to do is tell the CPU
143 asm volatile("lidt %0" :: "m" (idt_descr));
147 * Initializes and installs the calling CPU's Task State Segment (TSS).
152 unsigned int cpu = this_cpu;
153 struct tss_struct *tss = &per_cpu(tss, cpu);
157 * Initialize the CPU's Interrupt Stack Table.
158 * Certain exceptions and interrupts are handled with their own,
159 * known good stack. The IST holds the address of these stacks.
161 tss->ist[STACKFAULT_STACK-1] = (unsigned long)&stackfault_stack[cpu][0];
162 tss->ist[DOUBLEFAULT_STACK-1] = (unsigned long)&doublefault_stack[cpu][0];
163 tss->ist[NMI_STACK-1] = (unsigned long)&nmi_stack[cpu][0];
164 tss->ist[DEBUG_STACK-1] = (unsigned long)&debug_stack[cpu][0];
165 tss->ist[MCE_STACK-1] = (unsigned long)&mce_stack[cpu][0];
168 * Initialize the CPU's I/O permission bitmap.
169 * The <= is required because the CPU will access up to 8 bits beyond
170 * the end of the IO permission bitmap.
172 tss->io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
173 for (i = 0; i <= IO_BITMAP_LONGS; i++)
174 tss->io_bitmap[i] = ~0UL;
177 * Install the CPU's TSS and load the CPU's Task Register (TR).
178 * Each CPU has its own TSS.
180 set_tss_desc(cpu, tss);
181 asm volatile("ltr %w0":: "r" (GDT_ENTRY_TSS*8));
185 * Initializes various Model Specific Registers (MSRs) of the calling CPU.
191 * Setup the MSRs needed to support the SYSCALL and SYSRET
192 * instructions. Really, you should read the manual to understand these
193 * gems. In summary, STAR and LSTAR specify the CS, SS, and RIP to
194 * install when the SYSCALL instruction is issued. They also specify the
195 * CS and SS to install on SYSRET.
197 * On SYSCALL, the x86_64 CPU control unit uses STAR to load CS and SS and
198 * LSTAR to load RIP. The old RIP is saved in RCX.
200 * On SYSRET, the control unit uses STAR to restore CS and SS.
201 * RIP is loaded from RCX.
203 * SYSCALL_MASK specifies the RFLAGS to clear on SYSCALL.
205 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | /* SYSRET CS+SS */
206 ((u64)__KERNEL_CS)<<32); /* SYSCALL CS+SS */
207 wrmsrl(MSR_LSTAR, asm_syscall); /* SYSCALL RIP */
208 wrmsrl(MSR_CSTAR, asm_syscall_ignore); /* RIP for compat. mode */
209 wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000);
212 * Setup MSRs needed to support the PDA.
213 * pda_init() initialized MSR_GS_BASE already. When the SWAPGS
214 * instruction is issued, the x86_64 control unit atomically swaps
215 * MSR_GS_BASE and MSR_KERNEL_GS_BASE. So, when we call SWAPGS to
216 * exit the kernel, the value in MSR_KERNEL_GS_BASE will be loaded.
217 * User-space will see MSR_FS_BASE and MSR_GS_BASE both set to 0.
219 wrmsrl(MSR_FS_BASE, 0);
220 wrmsrl(MSR_KERNEL_GS_BASE, 0);
224 * Initializes the calling CPU's debug registers.
230 * Clear the CPU's debug registers.
231 * DR[0-3] are Address-Breakpoint Registers
232 * DR[4-5] are reserved and should not be used by software
233 * DR6 is the Debug Status Register
234 * DR7 is the Debug Control Register
236 set_debugreg(0UL, 0);
237 set_debugreg(0UL, 1);
238 set_debugreg(0UL, 2);
239 set_debugreg(0UL, 3);
240 set_debugreg(0UL, 6);
241 set_debugreg(0UL, 7);
248 * Get a reference to the currently executing task and the ID of the
249 * CPU being initialized. We can't use the normal 'current' mechanism
250 * since it relies on the PDA being initialized, which it isn't for all
251 * CPUs other than the boot CPU (id=0). pda_init() is called below.
253 struct task_struct *me = get_current_via_RSP();
254 unsigned int cpu = me->cpu_id; /* logical ID */
256 if (cpu_test_and_set(cpu, cpu_initialized_map))
257 panic("CPU#%u already initialized!\n", cpu);
258 printk(KERN_DEBUG "Initializing CPU#%u\n", cpu);
260 pda_init(cpu, me); /* per-cpu data area */
261 identify_cpu(); /* determine cpu features via CPUID */
262 cr4_init(); /* control register 4 */
263 gdt_init(); /* global descriptor table */
264 idt_init(); /* interrupt descriptor table */
265 tss_init(); /* task state segment */
266 msr_init(); /* misc. model specific registers */
267 dbg_init(); /* debug registers */
268 fpu_init(); /* floating point unit */
269 lapic_init(); /* local advanced prog. interrupt controller */
270 time_init(); /* detects CPU frequency, udelay(), etc. */
271 barrier(); /* compiler memory barrier, avoids reordering */