2 * setup.c: Setup the world for vmxassist.
4 * Leendert van Doorn, leendert@watson.ibm.com
5 * Copyright (c) 2005, International Business Machines Corporation.
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
18 * Place - Suite 330, Boston, MA 02111-1307 USA.
24 #if (VMXASSIST_BASE != TEXTADDR)
25 #error VMXAssist base mismatch
28 #define NR_PGD (PGSIZE / sizeof(unsigned))
30 #define min(a, b) ((a) > (b) ? (b) : (a))
32 /* Which CPU are we booting, and what is the initial CS segment? */
33 int booting_cpu, booting_vector;
35 unsigned long long gdt[] __attribute__ ((aligned(32))) = {
36 0x0000000000000000ULL, /* 0x00: reserved */
37 0x0000890000000000ULL, /* 0x08: 32-bit TSS */
38 0x00CF9A000000FFFFULL, /* 0x10: CS 32-bit */
39 0x00CF92000000FFFFULL, /* 0x18: DS 32-bit */
42 struct dtr gdtr = { sizeof(gdt)-1, (unsigned long) &gdt };
44 struct tss tss __attribute__ ((aligned(4)));
46 unsigned long long idt[NR_TRAPS] __attribute__ ((aligned(32)));
48 struct dtr idtr = { sizeof(idt)-1, (unsigned long) &idt };
51 unsigned pgd[NR_PGD] __attribute__ ((aligned(PGSIZE))) = { 0 };
53 struct e820entry e820map[] = {
54 { 0x0000000000000000ULL, 0x000000000009F800ULL, E820_RAM },
55 { 0x000000000009F800ULL, 0x0000000000000800ULL, E820_RESERVED },
56 { 0x00000000000A0000ULL, 0x0000000000020000ULL, E820_IO },
57 { 0x00000000000C0000ULL, 0x0000000000040000ULL, E820_RESERVED },
58 { 0x0000000000100000ULL, 0x0000000000000000ULL, E820_RAM },
59 { 0x0000000000000000ULL, 0x0000000000001000ULL, E820_SHARED_PAGE },
60 { 0x0000000000000000ULL, 0x0000000000003000ULL, E820_NVS },
61 { 0x0000000000003000ULL, 0x000000000000A000ULL, E820_ACPI },
62 { 0x00000000FEC00000ULL, 0x0000000001400000ULL, E820_IO },
66 struct vmx_assist_context oldctx;
67 struct vmx_assist_context newctx;
69 unsigned long memory_size;
70 int initialize_real_mode;
72 extern char stack[], stack_top[];
73 extern unsigned trap_handlers[];
78 printf("VMXAssist (%s)\n", __DATE__);
80 /* Bochs its way to convey memory size */
81 memory_size = ((get_cmos(0x35) << 8) | get_cmos(0x34)) << 6;
82 if (memory_size > 0x3bc000)
83 memory_size = 0x3bc000;
84 memory_size = (memory_size << 10) + 0xF00000;
85 if (memory_size <= 0xF00000)
87 (((get_cmos(0x31) << 8) | get_cmos(0x30)) + 0x400) << 10;
88 memory_size += 0x400 << 10; /* + 1MB */
91 /* Create an SMAP for our debug environment */
92 e820map[4].size = memory_size - e820map[4].addr - PGSIZE;
93 e820map[5].addr = memory_size - PGSIZE;
94 e820map[6].addr = memory_size;
95 e820map[7].addr += memory_size;
97 *E820_MAP_NR = sizeof(e820map)/sizeof(e820map[0]);
98 memcpy(E820_MAP, e820map, sizeof(e820map));
101 printf("Memory size %ld MB\n", memory_size >> 20);
102 printf("E820 map:\n");
103 print_e820_map(E820_MAP, *E820_MAP_NR);
113 if (((unsigned)pgd & ~PGMASK) != 0)
114 panic("PGD not page aligned");
115 set_cr4(get_cr4() | CR4_PSE);
116 for (i = 0; i < NR_PGD; i++)
117 pgd[i] = (i * LPGSIZE)| PTE_PS | PTE_US | PTE_RW | PTE_P;
118 set_cr3((unsigned) pgd);
119 set_cr0(get_cr0() | (CR0_PE|CR0_PG));
126 unsigned long long addr = (unsigned long long) &tss;
128 /* setup task state segment */
129 memset(&tss, 0, sizeof(tss));
130 tss.ss0 = DATA_SELECTOR;
131 tss.esp0 = (unsigned) stack_top - 4*4;
132 tss.iomap_base = offsetof(struct tss, iomap);
134 /* initialize gdt's tss selector */
135 gdt[TSS_SELECTOR / sizeof(gdt[0])] |=
136 ((addr & 0xFF000000) << (56-24)) |
137 ((addr & 0x00FF0000) << (32-16)) |
138 ((addr & 0x0000FFFF) << (16)) |
141 /* switch to our own gdt and set current tss */
142 __asm__ __volatile__ ("lgdt %0" : : "m" (gdtr));
143 __asm__ __volatile__ ("movl %%eax,%%ds;"
147 "movl %%eax,%%ss" : : "a" (DATA_SELECTOR));
149 __asm__ __volatile__ ("ljmp %0,$1f; 1:" : : "i" (CODE_SELECTOR));
151 __asm__ __volatile__ ("ltr %%ax" : : "a" (TSS_SELECTOR));
155 set_intr_gate(int i, unsigned handler)
157 unsigned long long addr = handler;
159 idt[i] = ((addr & 0xFFFF0000ULL) << 32) | (0x8E00ULL << 32) |
160 (addr & 0xFFFFULL) | (CODE_SELECTOR << 16);
168 for (i = 0; i < NR_TRAPS; i++)
169 set_intr_gate(i, trap_handlers[i]);
170 __asm__ __volatile__ ("lidt %0" : : "m" (idtr));
176 /* mask all interrupts */
177 outb(PIC_MASTER + PIC_IMR, 0xFF);
178 outb(PIC_SLAVE + PIC_IMR, 0xFF);
180 /* setup master PIC */
181 outb(PIC_MASTER + PIC_CMD, 0x11); /* edge triggered, cascade, ICW4 */
182 outb(PIC_MASTER + PIC_IMR, NR_EXCEPTION_HANDLER);
183 outb(PIC_MASTER + PIC_IMR, 1 << 2); /* slave on channel 2 */
184 outb(PIC_MASTER + PIC_IMR, 0x01);
186 /* setup slave PIC */
187 outb(PIC_SLAVE + PIC_CMD, 0x11); /* edge triggered, cascade, ICW4 */
188 outb(PIC_SLAVE + PIC_IMR, NR_EXCEPTION_HANDLER + 8);
189 outb(PIC_SLAVE + PIC_IMR, 0x02); /* slave identity is 2 */
190 outb(PIC_SLAVE + PIC_IMR, 0x01);
192 /* enable all interrupts */
193 outb(PIC_MASTER + PIC_IMR, 0);
194 outb(PIC_SLAVE + PIC_IMR, 0);
200 tss.iomap[port >> 3] |= 1 << (port & 7);
204 enter_real_mode(struct regs *regs)
206 /* mask off TSS busy bit */
207 gdt[TSS_SELECTOR / sizeof(gdt[0])] &= ~0x0000020000000000ULL;
209 /* start 8086 emulation of BIOS */
210 if (initialize_real_mode) {
211 initialize_real_mode = 0;
212 regs->eflags |= EFLAGS_VM | 0x02;
213 regs->ves = regs->vds = regs->vfs = regs->vgs = 0xF000;
214 if (booting_cpu == 0) {
215 regs->cs = 0xF000; /* ROM BIOS POST entry point */
222 regs->cs = booting_vector << 8; /* AP entry point */
228 /* intercept accesses to the PIC */
229 setiomap(PIC_MASTER+PIC_CMD);
230 setiomap(PIC_MASTER+PIC_IMR);
231 setiomap(PIC_SLAVE+PIC_CMD);
232 setiomap(PIC_SLAVE+PIC_IMR);
234 printf("Starting emulated 16-bit real-mode: ip=%04x:%04x\n",
235 regs->cs, regs->eip);
237 mode = VM86_REAL; /* becomes previous mode */
238 set_mode(regs, VM86_REAL);
240 /* this should get us into 16-bit mode */
243 /* go from protected to real mode */
244 regs->eflags |= EFLAGS_VM;
246 set_mode(regs, VM86_PROTECTED_TO_REAL);
253 * Setup the environment for VMX assist.
254 * This environment consists of flat segments (code and data),
255 * its own gdt, idt, and tr.
260 struct vmx_assist_context *c = &newctx;
262 memset(c, 0, sizeof(*c));
263 c->eip = (unsigned long) switch_to_real_mode;
264 c->esp = (unsigned) stack_top - 4*4;
265 c->eflags = 0x2; /* no interrupts, please */
268 * Obviously, vmx assist is not running with CR0_PE disabled.
269 * The reason why the vmx assist cr0 has CR0.PE disabled is
270 * that a transtion to CR0.PE causes a world switch. It seems
271 * more natural to enable CR0.PE to cause a world switch to
272 * protected mode rather than disabling it.
275 c->cr0 = (get_cr0() | CR0_NE | CR0_PG) & ~CR0_PE;
276 c->cr3 = (unsigned long) pgd;
278 c->cr0 = (get_cr0() | CR0_NE) & ~CR0_PE;
283 c->idtr_limit = sizeof(idt)-1;
284 c->idtr_base = (unsigned long) &idt;
286 c->gdtr_limit = sizeof(gdt)-1;
287 c->gdtr_base = (unsigned long) &gdt;
289 c->cs_sel = CODE_SELECTOR;
290 c->cs_limit = 0xFFFFFFFF;
292 c->cs_arbytes.fields.seg_type = 0xb;
293 c->cs_arbytes.fields.s = 1;
294 c->cs_arbytes.fields.dpl = 0;
295 c->cs_arbytes.fields.p = 1;
296 c->cs_arbytes.fields.avl = 0;
297 c->cs_arbytes.fields.default_ops_size = 1;
298 c->cs_arbytes.fields.g = 1;
300 c->ds_sel = DATA_SELECTOR;
301 c->ds_limit = 0xFFFFFFFF;
303 c->ds_arbytes = c->cs_arbytes;
304 c->ds_arbytes.fields.seg_type = 0x3;
306 c->es_sel = DATA_SELECTOR;
307 c->es_limit = 0xFFFFFFFF;
309 c->es_arbytes = c->ds_arbytes;
311 c->ss_sel = DATA_SELECTOR;
312 c->ss_limit = 0xFFFFFFFF;
314 c->ss_arbytes = c->ds_arbytes;
316 c->fs_sel = DATA_SELECTOR;
317 c->fs_limit = 0xFFFFFFFF;
319 c->fs_arbytes = c->ds_arbytes;
321 c->gs_sel = DATA_SELECTOR;
322 c->gs_limit = 0xFFFFFFFF;
324 c->gs_arbytes = c->ds_arbytes;
326 c->tr_sel = TSS_SELECTOR;
327 c->tr_limit = sizeof(tss) - 1;
328 c->tr_base = (unsigned long) &tss;
329 c->tr_arbytes.fields.seg_type = 0xb; /* 0x9 | 0x2 (busy) */
330 c->tr_arbytes.fields.s = 0;
331 c->tr_arbytes.fields.dpl = 0;
332 c->tr_arbytes.fields.p = 1;
333 c->tr_arbytes.fields.avl = 0;
334 c->tr_arbytes.fields.default_ops_size = 0;
335 c->tr_arbytes.fields.g = 0;
340 c->ldtr_arbytes = c->ds_arbytes;
341 c->ldtr_arbytes.fields.seg_type = 0x2;
342 c->ldtr_arbytes.fields.s = 0;
343 c->ldtr_arbytes.fields.dpl = 0;
344 c->ldtr_arbytes.fields.p = 1;
345 c->ldtr_arbytes.fields.avl = 0;
346 c->ldtr_arbytes.fields.default_ops_size = 0;
347 c->ldtr_arbytes.fields.g = 0;
351 * Start BIOS by causing a world switch to vmxassist, which causes
352 * VM8086 to be enabled and control is transfered to F000:FFF0.
357 if (booting_cpu == 0)
358 printf("Start BIOS ...\n");
360 printf("Start AP %d from %08x ...\n",
361 booting_cpu, booting_vector << 12);
363 initialize_real_mode = 1;
364 set_cr0(get_cr0() & ~CR0_PE);
365 panic("vmxassist returned"); /* "cannot happen" */
371 printf("Hello from VMXAssist\n");
373 if (booting_cpu == 0)
384 set_cr4(get_cr4() | CR4_VME);
389 if (booting_cpu == 0)