Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


real->protected mode switch should work now
[palacios.git] / palacios / src / geekos / svm.c
1 #include <geekos/svm.h>
2 #include <geekos/vmm.h>
3
4 #include <geekos/vmcb.h>
5 #include <geekos/vmm_mem.h>
6 #include <geekos/vmm_paging.h>
7 #include <geekos/svm_handler.h>
8
9 #include <geekos/vmm_debug.h>
10 #include <geekos/vm_guest_mem.h>
11
12
13 /* TEMPORARY BECAUSE SVM IS WEIRD */
14 #include <geekos/tss.h>
15 /* ** */
16
17 extern struct vmm_os_hooks * os_hooks;
18
19 extern uint_t cpuid_ecx(uint_t op);
20 extern uint_t cpuid_edx(uint_t op);
21 extern void Get_MSR(uint_t MSR, uint_t * high_byte, uint_t * low_byte); 
22 extern void Set_MSR(uint_t MSR, uint_t high_byte, uint_t low_byte);
23 extern uint_t launch_svm(vmcb_t * vmcb_addr);
24 extern void safe_svm_launch(vmcb_t * vmcb_addr, struct guest_gprs * gprs);
25
26 extern uint_t Get_CR3();
27
28 extern void GetGDTR(void * gdt);
29 extern void GetIDTR(void * idt);
30
31 extern void DisableInts();
32
33 /* Checks machine SVM capability */
34 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
35 int is_svm_capable() {
36   uint_t ret =  cpuid_ecx(CPUID_FEATURE_IDS);
37   uint_t vm_cr_low = 0, vm_cr_high = 0;
38
39
40   if ((ret & CPUID_FEATURE_IDS_ecx_svm_avail) == 0) {
41     PrintDebug("SVM Not Available\n");
42     return 0;
43   } 
44
45   Get_MSR(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
46
47   if ((ret & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 1) {
48     PrintDebug("Nested Paging not supported\n");
49   }
50
51   if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 0) {
52     return 1;
53   }
54
55   ret = cpuid_edx(CPUID_SVM_REV_AND_FEATURE_IDS);
56
57   if ((ret & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
58     PrintDebug("SVM BIOS Disabled, not unlockable\n");
59   } else {
60     PrintDebug("SVM is locked with a key\n");
61   }
62
63   return 0;
64 }
65
66
67
68 void Init_SVM(struct vmm_ctrl_ops * vmm_ops) {
69   reg_ex_t msr;
70   void * host_state;
71
72
73   // Enable SVM on the CPU
74   Get_MSR(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
75   msr.e_reg.low |= EFER_MSR_svm_enable;
76   Set_MSR(EFER_MSR, 0, msr.e_reg.low);
77   
78   PrintDebug("SVM Enabled\n");
79
80
81   // Setup the host state save area
82   host_state = os_hooks->allocate_pages(4);
83   
84   msr.e_reg.high = 0;
85   msr.e_reg.low = (uint_t)host_state;
86
87
88   PrintDebug("Host State being saved at %x\n", (uint_t)host_state);
89   Set_MSR(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
90
91
92
93   // Setup the SVM specific vmm operations
94   vmm_ops->init_guest = &init_svm_guest;
95   vmm_ops->start_guest = &start_svm_guest;
96
97
98   return;
99 }
100
101
102 int init_svm_guest(struct guest_info *info) {
103  
104   PrintDebug("Allocating VMCB\n");
105   info->vmm_data = (void*)Allocate_VMCB();
106
107
108   //PrintDebug("Generating Guest nested page tables\n");
109   //  info->page_tables = NULL;
110   //info->page_tables = generate_guest_page_tables_64(&(info->mem_layout), &(info->mem_list));
111   //info->page_tables = generate_guest_page_tables(&(info->mem_layout), &(info->mem_list));
112   //PrintDebugPageTables(info->page_tables);
113
114   
115
116   PrintDebug("Initializing VMCB (addr=%x)\n", info->vmm_data);
117   Init_VMCB((vmcb_t*)(info->vmm_data), *info);
118   
119   //  info->rip = 0;
120
121   info->vm_regs.rdi = 0;
122   info->vm_regs.rsi = 0;
123   info->vm_regs.rbp = 0;
124   info->vm_regs.rsp = 0;
125   info->vm_regs.rbx = 0;
126   info->vm_regs.rdx = 0;
127   info->vm_regs.rcx = 0;
128   info->vm_regs.rax = 0;
129   
130   return 0;
131 }
132
133
134 // can we start a kernel thread here...
135 int start_svm_guest(struct guest_info *info) {
136
137
138
139   PrintDebug("Launching SVM VM (vmcb=%x)\n", info->vmm_data);
140   //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
141
142   while (1) {
143
144     PrintDebug("SVM Launch Args (vmcb=%x), (info=%x), (vm_regs=%x)\n", info->vmm_data,  &(info->vm_regs));
145     PrintDebug("Launching to RIP: %x\n", info->rip);
146     safe_svm_launch((vmcb_t*)(info->vmm_data), &(info->vm_regs));
147     //launch_svm((vmcb_t*)(info->vmm_data));
148     PrintDebug("SVM Returned\n");
149
150     if (handle_svm_exit(info) != 0) {
151       // handle exit code....
152       break;
153     }
154   }
155   return 0;
156 }
157
158
159
160 vmcb_t * Allocate_VMCB() {
161   vmcb_t * vmcb_page = (vmcb_t*)os_hooks->allocate_pages(1);
162
163
164   memset(vmcb_page, 0, 4096);
165
166   return vmcb_page;
167 }
168
169
170 void Init_VMCB_Real(vmcb_t * vmcb, struct guest_info vm_info) {
171   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
172   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
173   uint_t i;
174
175
176   guest_state->rsp = vm_info.vm_regs.rsp;
177   guest_state->rip = vm_info.rip;
178
179
180
181
182
183   guest_state->efer |= EFER_MSR_svm_enable;
184   guest_state->rflags = 0x00000002; // The reserved bit is always 1
185   ctrl_area->svm_instrs.instrs.VMRUN = 1;
186   // guest_state->cr0 = 0x00000001;    // PE 
187   ctrl_area->guest_ASID = 1;
188   guest_state->cr0 = 0x60000010;
189
190
191   ctrl_area->exceptions.ex_names.de = 1;
192   ctrl_area->exceptions.ex_names.df = 1;
193   ctrl_area->exceptions.ex_names.pf = 1;
194   ctrl_area->exceptions.ex_names.ts = 1;
195   ctrl_area->exceptions.ex_names.ss = 1;
196   ctrl_area->exceptions.ex_names.ac = 1;
197   ctrl_area->exceptions.ex_names.mc = 1;
198   ctrl_area->exceptions.ex_names.gp = 1;
199   ctrl_area->exceptions.ex_names.ud = 1;
200   ctrl_area->exceptions.ex_names.np = 1;
201   ctrl_area->exceptions.ex_names.of = 1;
202   ctrl_area->exceptions.ex_names.nmi = 1;
203
204   guest_state->cs.selector = 0xf000;
205   guest_state->cs.limit=0xffff;
206   guest_state->cs.base =  0xffff0000;
207   guest_state->cs.attrib.raw = 0x9a;
208
209   
210   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
211   for ( i = 0; segregs[i] != NULL; i++) {
212     struct vmcb_selector * seg = segregs[i];
213     
214     seg->selector = 0x0000;
215     seg->base = 0xffff0000;
216     seg->attrib.raw = 0x9b;
217     seg->limit = 0xffff;
218   }
219   
220   /* Set GPRs */
221   /*
222     EDX == 0xfxx
223     EAX, EBX, ECX, ESI, EDI, EBP, ESP == 0x0
224   */
225
226   guest_state->gdtr.base = 0;
227   guest_state->gdtr.limit = 0xffff;
228   guest_state->gdtr.attrib.raw = 0x0;
229
230   guest_state->idtr.base = 0;
231   guest_state->idtr.limit = 0xffff;
232   guest_state->idtr.attrib.raw = 0x0;
233
234   guest_state->ldtr.base = 0;
235   guest_state->ldtr.limit = 0xffff;
236   guest_state->ldtr.attrib.raw = 0x82;
237
238   guest_state->tr.base = 0;
239   guest_state->tr.limit = 0xffff;
240   guest_state->tr.attrib.raw = 0x83;
241
242
243
244
245   if (vm_info.io_map.num_ports > 0) {
246     vmm_io_hook_t * iter;
247     addr_t io_port_bitmap;
248     
249     io_port_bitmap = (addr_t)os_hooks->allocate_pages(3);
250     memset((uchar_t*)io_port_bitmap, 0, PAGE_SIZE * 3);
251     
252     ctrl_area->IOPM_BASE_PA = io_port_bitmap;
253
254     //PrintDebug("Setting up IO Map at 0x%x\n", io_port_bitmap);
255
256     FOREACH_IO_HOOK(vm_info.io_map, iter) {
257       ushort_t port = iter->port;
258       uchar_t * bitmap = (uchar_t *)io_port_bitmap;
259
260       bitmap += (port / 8);
261       PrintDebug("Setting Bit in block %x\n", bitmap);
262       *bitmap |= 1 << (port % 8);
263     }
264
265     //    memset((uchar_t*)io_port_bitmap, 0xff, PAGE_SIZE * 2);
266     //PrintDebugMemDump((uchar_t*)io_port_bitmap, PAGE_SIZE *2);
267
268     ctrl_area->instrs.instrs.IOIO_PROT = 1;
269   }
270
271   ctrl_area->instrs.instrs.INTR = 1;
272
273   // also determine if CPU supports nested paging
274
275   if (vm_info.page_mode == SHADOW_PAGING) {
276     PrintDebug("Creating initial shadow page table\n");
277     vm_info.shdw_pg_state.shadow_cr3.e_reg.low |= ((addr_t)create_passthrough_pde32_pts(&vm_info) & ~0xfff);
278     PrintDebug("Created\n");
279
280     guest_state->cr3 = vm_info.shdw_pg_state.shadow_cr3.r_reg;
281
282     ctrl_area->cr_reads.crs.cr3 = 1;
283     ctrl_area->cr_writes.crs.cr3 = 1;
284     ctrl_area->cr_reads.crs.cr0 = 1;
285     ctrl_area->cr_writes.crs.cr0 = 1;
286
287     ctrl_area->instrs.instrs.INVLPG = 1;
288     ctrl_area->instrs.instrs.INVLPGA = 1;
289
290         
291     guest_state->g_pat = 0x7040600070406ULL;
292
293     guest_state->cr0 |= 0x80000000;
294   } else if (vm_info.page_mode == NESTED_PAGING) {
295     // Flush the TLB on entries/exits
296     //ctrl_area->TLB_CONTROL = 1;
297
298     // Enable Nested Paging
299     //ctrl_area->NP_ENABLE = 1;
300
301     //PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
302
303         // Set the Nested Page Table pointer
304     //    ctrl_area->N_CR3 = ((addr_t)vm_info.page_tables);
305     // ctrl_area->N_CR3 = (addr_t)(vm_info.page_tables);
306
307     //   ctrl_area->N_CR3 = Get_CR3();
308     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
309
310     //    guest_state->g_pat = 0x7040600070406ULL;
311   }
312
313 }
314
315
316 void Init_VMCB(vmcb_t * vmcb, struct guest_info vm_info) {
317   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
318   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
319   uint_t i;
320
321
322   guest_state->rsp = vm_info.vm_regs.rsp;
323   guest_state->rip = vm_info.rip;
324
325
326   //ctrl_area->instrs.instrs.CR0 = 1;
327   ctrl_area->cr_reads.crs.cr0 = 1;
328   ctrl_area->cr_writes.crs.cr0 = 1;
329
330   guest_state->efer |= EFER_MSR_svm_enable;
331   guest_state->rflags = 0x00000002; // The reserved bit is always 1
332   ctrl_area->svm_instrs.instrs.VMRUN = 1;
333   // guest_state->cr0 = 0x00000001;    // PE 
334   ctrl_area->guest_ASID = 1;
335
336
337   ctrl_area->exceptions.ex_names.de = 1;
338   ctrl_area->exceptions.ex_names.df = 1;
339   ctrl_area->exceptions.ex_names.pf = 1;
340   ctrl_area->exceptions.ex_names.ts = 1;
341   ctrl_area->exceptions.ex_names.ss = 1;
342   ctrl_area->exceptions.ex_names.ac = 1;
343   ctrl_area->exceptions.ex_names.mc = 1;
344   ctrl_area->exceptions.ex_names.gp = 1;
345   ctrl_area->exceptions.ex_names.ud = 1;
346   ctrl_area->exceptions.ex_names.np = 1;
347   ctrl_area->exceptions.ex_names.of = 1;
348   ctrl_area->exceptions.ex_names.nmi = 1;
349
350   guest_state->cs.selector = 0x0000;
351   guest_state->cs.limit=~0u;
352   guest_state->cs.base = guest_state->cs.selector<<4;
353   guest_state->cs.attrib.raw = 0xf3;
354
355   
356   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
357   for ( i = 0; segregs[i] != NULL; i++) {
358     struct vmcb_selector * seg = segregs[i];
359     
360     seg->selector = 0x0000;
361     seg->base = seg->selector << 4;
362     seg->attrib.raw = 0xf3;
363     seg->limit = ~0u;
364   }
365   
366   if (vm_info.io_map.num_ports > 0) {
367     vmm_io_hook_t * iter;
368     addr_t io_port_bitmap;
369     
370     io_port_bitmap = (addr_t)os_hooks->allocate_pages(3);
371     memset((uchar_t*)io_port_bitmap, 0, PAGE_SIZE * 3);
372     
373     ctrl_area->IOPM_BASE_PA = io_port_bitmap;
374
375     //PrintDebug("Setting up IO Map at 0x%x\n", io_port_bitmap);
376
377     FOREACH_IO_HOOK(vm_info.io_map, iter) {
378       ushort_t port = iter->port;
379       uchar_t * bitmap = (uchar_t *)io_port_bitmap;
380
381       bitmap += (port / 8);
382       PrintDebug("Setting Bit in block %x\n", bitmap);
383       *bitmap |= 1 << (port % 8);
384     }
385
386
387     //PrintDebugMemDump((uchar_t*)io_port_bitmap, PAGE_SIZE *2);
388
389     ctrl_area->instrs.instrs.IOIO_PROT = 1;
390   }
391
392   ctrl_area->instrs.instrs.INTR = 1;
393
394
395
396   if (vm_info.page_mode == SHADOW_PAGING) {
397     PrintDebug("Creating initial shadow page table\n");
398     vm_info.shdw_pg_state.shadow_cr3.e_reg.low |= ((addr_t)create_passthrough_pde32_pts(&vm_info) & ~0xfff);
399     PrintDebug("Created\n");
400
401     guest_state->cr3 = vm_info.shdw_pg_state.shadow_cr3.r_reg;
402
403     ctrl_area->cr_reads.crs.cr3 = 1;
404     ctrl_area->cr_writes.crs.cr3 = 1;
405
406
407     ctrl_area->instrs.instrs.INVLPG = 1;
408     ctrl_area->instrs.instrs.INVLPGA = 1;
409
410     guest_state->g_pat = 0x7040600070406ULL;
411
412     guest_state->cr0 |= 0x80000000;
413   } else if (vm_info.page_mode == NESTED_PAGING) {
414     // Flush the TLB on entries/exits
415     //ctrl_area->TLB_CONTROL = 1;
416
417     // Enable Nested Paging
418     //ctrl_area->NP_ENABLE = 1;
419
420     //PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
421
422         // Set the Nested Page Table pointer
423     //    ctrl_area->N_CR3 = ((addr_t)vm_info.page_tables);
424     // ctrl_area->N_CR3 = (addr_t)(vm_info.page_tables);
425
426     //   ctrl_area->N_CR3 = Get_CR3();
427     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
428
429     //    guest_state->g_pat = 0x7040600070406ULL;
430   }
431
432
433
434 }
435
436 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
437   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
438   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
439   uint_t i = 0;
440
441
442   guest_state->rsp = vm_info.vm_regs.rsp;
443   guest_state->rip = vm_info.rip;
444
445
446   /* I pretty much just gutted this from TVMM */
447   /* Note: That means its probably wrong */
448
449   // set the segment registers to mirror ours
450   guest_state->cs.selector = 1<<3;
451   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
452   guest_state->cs.attrib.fields.S = 1;
453   guest_state->cs.attrib.fields.P = 1;
454   guest_state->cs.attrib.fields.db = 1;
455   guest_state->cs.attrib.fields.G = 1;
456   guest_state->cs.limit = 0xfffff;
457   guest_state->cs.base = 0;
458   
459   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
460   for ( i = 0; segregs[i] != NULL; i++) {
461     struct vmcb_selector * seg = segregs[i];
462     
463     seg->selector = 2<<3;
464     seg->attrib.fields.type = 0x2; // Data Segment+read/write
465     seg->attrib.fields.S = 1;
466     seg->attrib.fields.P = 1;
467     seg->attrib.fields.db = 1;
468     seg->attrib.fields.G = 1;
469     seg->limit = 0xfffff;
470     seg->base = 0;
471   }
472
473
474   {
475     /* JRL THIS HAS TO GO */
476     
477     guest_state->tr.selector = GetTR_Selector();
478     guest_state->tr.attrib.fields.type = 0x9; 
479     guest_state->tr.attrib.fields.P = 1;
480     guest_state->tr.limit = GetTR_Limit();
481     guest_state->tr.base = GetTR_Base();// - 0x2000;
482     /* ** */
483   }
484
485
486   /* ** */
487
488
489   guest_state->efer |= EFER_MSR_svm_enable;
490   guest_state->rflags = 0x00000002; // The reserved bit is always 1
491   ctrl_area->svm_instrs.instrs.VMRUN = 1;
492   guest_state->cr0 = 0x00000001;    // PE 
493   ctrl_area->guest_ASID = 1;
494
495
496   //  guest_state->cpl = 0;
497
498
499
500   // Setup exits
501
502   ctrl_area->cr_writes.crs.cr4 = 1;
503   
504   ctrl_area->exceptions.ex_names.de = 1;
505   ctrl_area->exceptions.ex_names.df = 1;
506   ctrl_area->exceptions.ex_names.pf = 1;
507   ctrl_area->exceptions.ex_names.ts = 1;
508   ctrl_area->exceptions.ex_names.ss = 1;
509   ctrl_area->exceptions.ex_names.ac = 1;
510   ctrl_area->exceptions.ex_names.mc = 1;
511   ctrl_area->exceptions.ex_names.gp = 1;
512   ctrl_area->exceptions.ex_names.ud = 1;
513   ctrl_area->exceptions.ex_names.np = 1;
514   ctrl_area->exceptions.ex_names.of = 1;
515   ctrl_area->exceptions.ex_names.nmi = 1;
516
517   
518
519   ctrl_area->instrs.instrs.IOIO_PROT = 1;
520   ctrl_area->IOPM_BASE_PA = (uint_t)os_hooks->allocate_pages(3);
521   
522   {
523     reg_ex_t tmp_reg;
524     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
525     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
526   }
527
528   ctrl_area->instrs.instrs.INTR = 1;
529
530   
531   {
532     char gdt_buf[6];
533     char idt_buf[6];
534
535     memset(gdt_buf, 0, 6);
536     memset(idt_buf, 0, 6);
537
538
539     uint_t gdt_base, idt_base;
540     ushort_t gdt_limit, idt_limit;
541     
542     GetGDTR(gdt_buf);
543     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
544     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
545     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
546
547     GetIDTR(idt_buf);
548     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
549     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
550     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
551
552
553     // gdt_base -= 0x2000;
554     //idt_base -= 0x2000;
555
556     guest_state->gdtr.base = gdt_base;
557     guest_state->gdtr.limit = gdt_limit;
558     guest_state->idtr.base = idt_base;
559     guest_state->idtr.limit = idt_limit;
560
561
562   }
563   
564   
565   // also determine if CPU supports nested paging
566   /*
567   if (vm_info.page_tables) {
568     //   if (0) {
569     // Flush the TLB on entries/exits
570     ctrl_area->TLB_CONTROL = 1;
571
572     // Enable Nested Paging
573     ctrl_area->NP_ENABLE = 1;
574
575     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
576
577         // Set the Nested Page Table pointer
578     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
579
580
581     //   ctrl_area->N_CR3 = Get_CR3();
582     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
583
584     guest_state->g_pat = 0x7040600070406ULL;
585
586     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
587     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
588     // Enable Paging
589     //    guest_state->cr0 |= 0x80000000;
590   }
591   */
592
593 }
594
595