Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


added 32 bit operand support
[palacios.git] / palacios / src / geekos / svm.c
1 #include <geekos/svm.h>
2 #include <geekos/vmm.h>
3
4 #include <geekos/vmcb.h>
5 #include <geekos/vmm_mem.h>
6 #include <geekos/vmm_paging.h>
7 #include <geekos/svm_handler.h>
8
9 #include <geekos/vmm_debug.h>
10 #include <geekos/vm_guest_mem.h>
11
12
13 /* TEMPORARY BECAUSE SVM IS WEIRD */
14 #include <geekos/tss.h>
15 /* ** */
16
17 extern struct vmm_os_hooks * os_hooks;
18
19 extern uint_t cpuid_ecx(uint_t op);
20 extern uint_t cpuid_edx(uint_t op);
21 extern void Get_MSR(uint_t MSR, uint_t * high_byte, uint_t * low_byte); 
22 extern void Set_MSR(uint_t MSR, uint_t high_byte, uint_t low_byte);
23 extern uint_t launch_svm(vmcb_t * vmcb_addr);
24 extern void safe_svm_launch(vmcb_t * vmcb_addr, struct guest_gprs * gprs);
25
26 extern uint_t Get_CR3();
27
28 extern void GetGDTR(void * gdt);
29 extern void GetIDTR(void * idt);
30
31 extern void DisableInts();
32
33 /* Checks machine SVM capability */
34 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
35 int is_svm_capable() {
36   uint_t ret =  cpuid_ecx(CPUID_FEATURE_IDS);
37   uint_t vm_cr_low = 0, vm_cr_high = 0;
38
39
40   if ((ret & CPUID_FEATURE_IDS_ecx_svm_avail) == 0) {
41     PrintDebug("SVM Not Available\n");
42     return 0;
43   } 
44
45   Get_MSR(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
46
47   if ((ret & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 1) {
48     PrintDebug("Nested Paging not supported\n");
49   }
50
51   if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 0) {
52     return 1;
53   }
54
55   ret = cpuid_edx(CPUID_SVM_REV_AND_FEATURE_IDS);
56
57   if ((ret & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
58     PrintDebug("SVM BIOS Disabled, not unlockable\n");
59   } else {
60     PrintDebug("SVM is locked with a key\n");
61   }
62
63   return 0;
64 }
65
66
67
68 void Init_SVM(struct vmm_ctrl_ops * vmm_ops) {
69   reg_ex_t msr;
70   void * host_state;
71
72
73   // Enable SVM on the CPU
74   Get_MSR(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
75   msr.e_reg.low |= EFER_MSR_svm_enable;
76   Set_MSR(EFER_MSR, 0, msr.e_reg.low);
77   
78   PrintDebug("SVM Enabled\n");
79
80
81   // Setup the host state save area
82   host_state = os_hooks->allocate_pages(4);
83   
84   msr.e_reg.high = 0;
85   msr.e_reg.low = (uint_t)host_state;
86
87
88   PrintDebug("Host State being saved at %x\n", (uint_t)host_state);
89   Set_MSR(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
90
91
92
93   // Setup the SVM specific vmm operations
94   vmm_ops->init_guest = &init_svm_guest;
95   vmm_ops->start_guest = &start_svm_guest;
96
97
98   return;
99 }
100
101
102 int init_svm_guest(struct guest_info *info) {
103  
104   PrintDebug("Allocating VMCB\n");
105   info->vmm_data = (void*)Allocate_VMCB();
106
107
108   //PrintDebug("Generating Guest nested page tables\n");
109   //  info->page_tables = NULL;
110   //info->page_tables = generate_guest_page_tables_64(&(info->mem_layout), &(info->mem_list));
111   //info->page_tables = generate_guest_page_tables(&(info->mem_layout), &(info->mem_list));
112   //PrintDebugPageTables(info->page_tables);
113
114
115   PrintDebug("Initializing VMCB (addr=%x)\n", info->vmm_data);
116   Init_VMCB((vmcb_t*)(info->vmm_data), *info);
117   
118   //  info->rip = 0;
119
120   info->vm_regs.rdi = 0;
121   info->vm_regs.rsi = 0;
122   info->vm_regs.rbp = 0;
123   info->vm_regs.rsp = 0;
124   info->vm_regs.rbx = 0;
125   info->vm_regs.rdx = 0;
126   info->vm_regs.rcx = 0;
127   info->vm_regs.rax = 0;
128   
129   return 0;
130 }
131
132
133 // can we start a kernel thread here...
134 int start_svm_guest(struct guest_info *info) {
135
136
137
138   PrintDebug("Launching SVM VM (vmcb=%x)\n", info->vmm_data);
139   //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
140
141   while (1) {
142
143     PrintDebug("SVM Launch Args (vmcb=%x), (info=%x), (vm_regs=%x)\n", info->vmm_data,  &(info->vm_regs));
144     PrintDebug("Launching to RIP: %x\n", info->rip);
145     safe_svm_launch((vmcb_t*)(info->vmm_data), &(info->vm_regs));
146     //launch_svm((vmcb_t*)(info->vmm_data));
147     PrintDebug("SVM Returned\n");
148
149     if (handle_svm_exit(info) != 0) {
150       // handle exit code....
151       break;
152     }
153   }
154   return 0;
155 }
156
157
158
159 vmcb_t * Allocate_VMCB() {
160   vmcb_t * vmcb_page = (vmcb_t*)os_hooks->allocate_pages(1);
161
162
163   memset(vmcb_page, 0, 4096);
164
165   return vmcb_page;
166 }
167
168
169 void Init_VMCB_Real(vmcb_t * vmcb, struct guest_info vm_info) {
170   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
171   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
172   uint_t i;
173
174
175   guest_state->rsp = vm_info.vm_regs.rsp;
176   guest_state->rip = vm_info.rip;
177
178
179
180
181
182   guest_state->efer |= EFER_MSR_svm_enable;
183   guest_state->rflags = 0x00000002; // The reserved bit is always 1
184   ctrl_area->svm_instrs.instrs.VMRUN = 1;
185   // guest_state->cr0 = 0x00000001;    // PE 
186   ctrl_area->guest_ASID = 1;
187   guest_state->cr0 = 0x60000010;
188
189
190   ctrl_area->exceptions.ex_names.de = 1;
191   ctrl_area->exceptions.ex_names.df = 1;
192   ctrl_area->exceptions.ex_names.pf = 1;
193   ctrl_area->exceptions.ex_names.ts = 1;
194   ctrl_area->exceptions.ex_names.ss = 1;
195   ctrl_area->exceptions.ex_names.ac = 1;
196   ctrl_area->exceptions.ex_names.mc = 1;
197   ctrl_area->exceptions.ex_names.gp = 1;
198   ctrl_area->exceptions.ex_names.ud = 1;
199   ctrl_area->exceptions.ex_names.np = 1;
200   ctrl_area->exceptions.ex_names.of = 1;
201   ctrl_area->exceptions.ex_names.nmi = 1;
202
203   guest_state->cs.selector = 0xf000;
204   guest_state->cs.limit=0xffff;
205   guest_state->cs.base =  0xffff0000;
206   guest_state->cs.attrib.raw = 0x9a;
207
208   
209   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
210   for ( i = 0; segregs[i] != NULL; i++) {
211     struct vmcb_selector * seg = segregs[i];
212     
213     seg->selector = 0x0000;
214     seg->base = 0xffff0000;
215     seg->attrib.raw = 0x9b;
216     seg->limit = 0xffff;
217   }
218   
219   /* Set GPRs */
220   /*
221     EDX == 0xfxx
222     EAX, EBX, ECX, ESI, EDI, EBP, ESP == 0x0
223   */
224
225   guest_state->gdtr.base = 0;
226   guest_state->gdtr.limit = 0xffff;
227   guest_state->gdtr.attrib.raw = 0x0;
228
229   guest_state->idtr.base = 0;
230   guest_state->idtr.limit = 0xffff;
231   guest_state->idtr.attrib.raw = 0x0;
232
233   guest_state->ldtr.base = 0;
234   guest_state->ldtr.limit = 0xffff;
235   guest_state->ldtr.attrib.raw = 0x82;
236
237   guest_state->tr.base = 0;
238   guest_state->tr.limit = 0xffff;
239   guest_state->tr.attrib.raw = 0x83;
240
241
242
243
244   if (vm_info.io_map.num_ports > 0) {
245     vmm_io_hook_t * iter;
246     addr_t io_port_bitmap;
247     
248     io_port_bitmap = (addr_t)os_hooks->allocate_pages(3);
249     memset((uchar_t*)io_port_bitmap, 0, PAGE_SIZE * 3);
250     
251     ctrl_area->IOPM_BASE_PA = io_port_bitmap;
252
253     //PrintDebug("Setting up IO Map at 0x%x\n", io_port_bitmap);
254
255     FOREACH_IO_HOOK(vm_info.io_map, iter) {
256       ushort_t port = iter->port;
257       uchar_t * bitmap = (uchar_t *)io_port_bitmap;
258
259       bitmap += (port / 8);
260       PrintDebug("Setting Bit in block %x\n", bitmap);
261       *bitmap |= 1 << (port % 8);
262     }
263
264     ctrl_area->instrs.instrs.IOIO_PROT = 1;
265   }
266
267   ctrl_area->instrs.instrs.INTR = 1;
268
269   // also determine if CPU supports nested paging
270
271   if (vm_info.page_mode == SHADOW_PAGING) {
272     PrintDebug("Creating initial shadow page table\n");
273     vm_info.shdw_pg_state.shadow_cr3.e_reg.low |= ((addr_t)create_passthrough_pde32_pts(&vm_info) & ~0xfff);
274     PrintDebug("Created\n");
275
276     guest_state->cr3 = vm_info.shdw_pg_state.shadow_cr3.r_reg;
277
278     ctrl_area->cr_reads.crs.cr3 = 1;
279     ctrl_area->cr_writes.crs.cr3 = 1;
280     ctrl_area->cr_reads.crs.cr0 = 1;
281     ctrl_area->cr_writes.crs.cr0 = 1;
282
283     ctrl_area->instrs.instrs.INVLPG = 1;
284     ctrl_area->instrs.instrs.INVLPGA = 1;
285
286         
287     guest_state->g_pat = 0x7040600070406ULL;
288
289     guest_state->cr0 |= 0x80000000;
290   } else if (vm_info.page_mode == NESTED_PAGING) {
291     // Flush the TLB on entries/exits
292     //ctrl_area->TLB_CONTROL = 1;
293
294     // Enable Nested Paging
295     //ctrl_area->NP_ENABLE = 1;
296
297     //PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
298
299         // Set the Nested Page Table pointer
300     //    ctrl_area->N_CR3 = ((addr_t)vm_info.page_tables);
301     // ctrl_area->N_CR3 = (addr_t)(vm_info.page_tables);
302
303     //   ctrl_area->N_CR3 = Get_CR3();
304     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
305
306     //    guest_state->g_pat = 0x7040600070406ULL;
307   }
308
309 }
310
311
312 void Init_VMCB(vmcb_t * vmcb, struct guest_info vm_info) {
313   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
314   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
315   uint_t i;
316
317
318   guest_state->rsp = vm_info.vm_regs.rsp;
319   guest_state->rip = vm_info.rip;
320
321
322   //ctrl_area->instrs.instrs.CR0 = 1;
323   ctrl_area->cr_reads.crs.cr0 = 1;
324   ctrl_area->cr_writes.crs.cr0 = 1;
325
326   guest_state->efer |= EFER_MSR_svm_enable;
327   guest_state->rflags = 0x00000002; // The reserved bit is always 1
328   ctrl_area->svm_instrs.instrs.VMRUN = 1;
329   // guest_state->cr0 = 0x00000001;    // PE 
330   ctrl_area->guest_ASID = 1;
331
332
333   ctrl_area->exceptions.ex_names.de = 1;
334   ctrl_area->exceptions.ex_names.df = 1;
335   ctrl_area->exceptions.ex_names.pf = 1;
336   ctrl_area->exceptions.ex_names.ts = 1;
337   ctrl_area->exceptions.ex_names.ss = 1;
338   ctrl_area->exceptions.ex_names.ac = 1;
339   ctrl_area->exceptions.ex_names.mc = 1;
340   ctrl_area->exceptions.ex_names.gp = 1;
341   ctrl_area->exceptions.ex_names.ud = 1;
342   ctrl_area->exceptions.ex_names.np = 1;
343   ctrl_area->exceptions.ex_names.of = 1;
344   ctrl_area->exceptions.ex_names.nmi = 1;
345
346   guest_state->cs.selector = 0x0000;
347   guest_state->cs.limit=~0u;
348   guest_state->cs.base = guest_state->cs.selector<<4;
349   guest_state->cs.attrib.raw = 0xf3;
350
351   
352   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
353   for ( i = 0; segregs[i] != NULL; i++) {
354     struct vmcb_selector * seg = segregs[i];
355     
356     seg->selector = 0x0000;
357     seg->base = seg->selector << 4;
358     seg->attrib.raw = 0xf3;
359     seg->limit = ~0u;
360   }
361   
362   if (vm_info.io_map.num_ports > 0) {
363     vmm_io_hook_t * iter;
364     addr_t io_port_bitmap;
365     
366     io_port_bitmap = (addr_t)os_hooks->allocate_pages(3);
367     memset((uchar_t*)io_port_bitmap, 0, PAGE_SIZE * 3);
368     
369     ctrl_area->IOPM_BASE_PA = io_port_bitmap;
370
371     //PrintDebug("Setting up IO Map at 0x%x\n", io_port_bitmap);
372
373     FOREACH_IO_HOOK(vm_info.io_map, iter) {
374       ushort_t port = iter->port;
375       uchar_t * bitmap = (uchar_t *)io_port_bitmap;
376
377       bitmap += (port / 8);
378       PrintDebug("Setting Bit in block %x\n", bitmap);
379       *bitmap |= 1 << (port % 8);
380     }
381
382
383     //PrintDebugMemDump((uchar_t*)io_port_bitmap, PAGE_SIZE *2);
384
385     ctrl_area->instrs.instrs.IOIO_PROT = 1;
386   }
387
388   ctrl_area->instrs.instrs.INTR = 1;
389
390
391
392   if (vm_info.page_mode == SHADOW_PAGING) {
393     PrintDebug("Creating initial shadow page table\n");
394     vm_info.shdw_pg_state.shadow_cr3.e_reg.low |= ((addr_t)create_passthrough_pde32_pts(&vm_info) & ~0xfff);
395     PrintDebug("Created\n");
396
397     guest_state->cr3 = vm_info.shdw_pg_state.shadow_cr3.r_reg;
398
399     ctrl_area->cr_reads.crs.cr3 = 1;
400     ctrl_area->cr_writes.crs.cr3 = 1;
401
402
403     ctrl_area->instrs.instrs.INVLPG = 1;
404     ctrl_area->instrs.instrs.INVLPGA = 1;
405
406     guest_state->g_pat = 0x7040600070406ULL;
407
408     guest_state->cr0 |= 0x80000000;
409   } else if (vm_info.page_mode == NESTED_PAGING) {
410     // Flush the TLB on entries/exits
411     //ctrl_area->TLB_CONTROL = 1;
412
413     // Enable Nested Paging
414     //ctrl_area->NP_ENABLE = 1;
415
416     //PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
417
418         // Set the Nested Page Table pointer
419     //    ctrl_area->N_CR3 = ((addr_t)vm_info.page_tables);
420     // ctrl_area->N_CR3 = (addr_t)(vm_info.page_tables);
421
422     //   ctrl_area->N_CR3 = Get_CR3();
423     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
424
425     //    guest_state->g_pat = 0x7040600070406ULL;
426   }
427
428
429
430 }
431
432 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
433   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
434   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
435   uint_t i = 0;
436
437
438   guest_state->rsp = vm_info.vm_regs.rsp;
439   guest_state->rip = vm_info.rip;
440
441
442   /* I pretty much just gutted this from TVMM */
443   /* Note: That means its probably wrong */
444
445   // set the segment registers to mirror ours
446   guest_state->cs.selector = 1<<3;
447   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
448   guest_state->cs.attrib.fields.S = 1;
449   guest_state->cs.attrib.fields.P = 1;
450   guest_state->cs.attrib.fields.db = 1;
451   guest_state->cs.attrib.fields.G = 1;
452   guest_state->cs.limit = 0xfffff;
453   guest_state->cs.base = 0;
454   
455   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
456   for ( i = 0; segregs[i] != NULL; i++) {
457     struct vmcb_selector * seg = segregs[i];
458     
459     seg->selector = 2<<3;
460     seg->attrib.fields.type = 0x2; // Data Segment+read/write
461     seg->attrib.fields.S = 1;
462     seg->attrib.fields.P = 1;
463     seg->attrib.fields.db = 1;
464     seg->attrib.fields.G = 1;
465     seg->limit = 0xfffff;
466     seg->base = 0;
467   }
468
469
470   {
471     /* JRL THIS HAS TO GO */
472     
473     guest_state->tr.selector = GetTR_Selector();
474     guest_state->tr.attrib.fields.type = 0x9; 
475     guest_state->tr.attrib.fields.P = 1;
476     guest_state->tr.limit = GetTR_Limit();
477     guest_state->tr.base = GetTR_Base();// - 0x2000;
478     /* ** */
479   }
480
481
482   /* ** */
483
484
485   guest_state->efer |= EFER_MSR_svm_enable;
486   guest_state->rflags = 0x00000002; // The reserved bit is always 1
487   ctrl_area->svm_instrs.instrs.VMRUN = 1;
488   guest_state->cr0 = 0x00000001;    // PE 
489   ctrl_area->guest_ASID = 1;
490
491
492   //  guest_state->cpl = 0;
493
494
495
496   // Setup exits
497
498   ctrl_area->cr_writes.crs.cr4 = 1;
499   
500   ctrl_area->exceptions.ex_names.de = 1;
501   ctrl_area->exceptions.ex_names.df = 1;
502   ctrl_area->exceptions.ex_names.pf = 1;
503   ctrl_area->exceptions.ex_names.ts = 1;
504   ctrl_area->exceptions.ex_names.ss = 1;
505   ctrl_area->exceptions.ex_names.ac = 1;
506   ctrl_area->exceptions.ex_names.mc = 1;
507   ctrl_area->exceptions.ex_names.gp = 1;
508   ctrl_area->exceptions.ex_names.ud = 1;
509   ctrl_area->exceptions.ex_names.np = 1;
510   ctrl_area->exceptions.ex_names.of = 1;
511   ctrl_area->exceptions.ex_names.nmi = 1;
512
513   
514
515   ctrl_area->instrs.instrs.IOIO_PROT = 1;
516   ctrl_area->IOPM_BASE_PA = (uint_t)os_hooks->allocate_pages(3);
517   
518   {
519     reg_ex_t tmp_reg;
520     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
521     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
522   }
523
524   ctrl_area->instrs.instrs.INTR = 1;
525
526   
527   {
528     char gdt_buf[6];
529     char idt_buf[6];
530
531     memset(gdt_buf, 0, 6);
532     memset(idt_buf, 0, 6);
533
534
535     uint_t gdt_base, idt_base;
536     ushort_t gdt_limit, idt_limit;
537     
538     GetGDTR(gdt_buf);
539     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
540     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
541     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
542
543     GetIDTR(idt_buf);
544     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
545     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
546     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
547
548
549     // gdt_base -= 0x2000;
550     //idt_base -= 0x2000;
551
552     guest_state->gdtr.base = gdt_base;
553     guest_state->gdtr.limit = gdt_limit;
554     guest_state->idtr.base = idt_base;
555     guest_state->idtr.limit = idt_limit;
556
557
558   }
559   
560   
561   // also determine if CPU supports nested paging
562   /*
563   if (vm_info.page_tables) {
564     //   if (0) {
565     // Flush the TLB on entries/exits
566     ctrl_area->TLB_CONTROL = 1;
567
568     // Enable Nested Paging
569     ctrl_area->NP_ENABLE = 1;
570
571     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
572
573         // Set the Nested Page Table pointer
574     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
575
576
577     //   ctrl_area->N_CR3 = Get_CR3();
578     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
579
580     guest_state->g_pat = 0x7040600070406ULL;
581
582     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
583     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
584     // Enable Paging
585     //    guest_state->cr0 |= 0x80000000;
586   }
587   */
588
589 }
590
591