Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


ad870fff0c9bebafb4eda3f64d2e285423ee7ea0
[palacios.git] / palacios / src / geekos / svm.c
1 #include <geekos/svm.h>
2 #include <geekos/vmm.h>
3
4 #include <geekos/vmcb.h>
5 #include <geekos/vmm_mem.h>
6 #include <geekos/vmm_paging.h>
7 #include <geekos/svm_handler.h>
8
9 #include <geekos/vmm_debug.h>
10 #include <geekos/vm_guest_mem.h>
11
12
13 /* TEMPORARY BECAUSE SVM IS WEIRD */
14 #include <geekos/tss.h>
15 /* ** */
16
17 extern struct vmm_os_hooks * os_hooks;
18
19 extern uint_t cpuid_ecx(uint_t op);
20 extern uint_t cpuid_edx(uint_t op);
21 extern void Get_MSR(uint_t MSR, uint_t * high_byte, uint_t * low_byte); 
22 extern void Set_MSR(uint_t MSR, uint_t high_byte, uint_t low_byte);
23 extern uint_t launch_svm(vmcb_t * vmcb_addr);
24 extern void safe_svm_launch(vmcb_t * vmcb_addr, struct guest_gprs * gprs);
25
26 extern uint_t Get_CR3();
27
28 extern void GetGDTR(void * gdt);
29 extern void GetIDTR(void * idt);
30
31 extern void DisableInts();
32
33 /* Checks machine SVM capability */
34 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
35 int is_svm_capable() {
36   uint_t ret =  cpuid_ecx(CPUID_FEATURE_IDS);
37   uint_t vm_cr_low = 0, vm_cr_high = 0;
38
39
40   if ((ret & CPUID_FEATURE_IDS_ecx_svm_avail) == 0) {
41     PrintDebug("SVM Not Available\n");
42     return 0;
43   } 
44
45   Get_MSR(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
46
47   if ((ret & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 1) {
48     PrintDebug("Nested Paging not supported\n");
49   }
50
51   if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 0) {
52     return 1;
53   }
54
55   ret = cpuid_edx(CPUID_SVM_REV_AND_FEATURE_IDS);
56
57   if ((ret & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
58     PrintDebug("SVM BIOS Disabled, not unlockable\n");
59   } else {
60     PrintDebug("SVM is locked with a key\n");
61   }
62
63   return 0;
64 }
65
66
67
68 void Init_SVM(struct vmm_ctrl_ops * vmm_ops) {
69   reg_ex_t msr;
70   void * host_state;
71
72
73   // Enable SVM on the CPU
74   Get_MSR(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
75   msr.e_reg.low |= EFER_MSR_svm_enable;
76   Set_MSR(EFER_MSR, 0, msr.e_reg.low);
77   
78   PrintDebug("SVM Enabled\n");
79
80
81   // Setup the host state save area
82   host_state = os_hooks->allocate_pages(4);
83   
84   msr.e_reg.high = 0;
85   msr.e_reg.low = (uint_t)host_state;
86
87
88   PrintDebug("Host State being saved at %x\n", (uint_t)host_state);
89   Set_MSR(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
90
91
92
93   // Setup the SVM specific vmm operations
94   vmm_ops->init_guest = &init_svm_guest;
95   vmm_ops->start_guest = &start_svm_guest;
96
97
98   return;
99 }
100
101
102 int init_svm_guest(struct guest_info *info) {
103  
104   PrintDebug("Allocating VMCB\n");
105   info->vmm_data = (void*)Allocate_VMCB();
106
107
108   //PrintDebug("Generating Guest nested page tables\n");
109   //  info->page_tables = NULL;
110   //info->page_tables = generate_guest_page_tables_64(&(info->mem_layout), &(info->mem_list));
111   //info->page_tables = generate_guest_page_tables(&(info->mem_layout), &(info->mem_list));
112   //PrintDebugPageTables(info->page_tables);
113
114   
115
116   PrintDebug("Initializing VMCB (addr=%x)\n", info->vmm_data);
117   Init_VMCB((vmcb_t*)(info->vmm_data), *info);
118   
119   info->rip = 0;
120
121   info->vm_regs.rdi = 0;
122   info->vm_regs.rsi = 0;
123   info->vm_regs.rbp = 0;
124   info->vm_regs.rsp = 0;
125   info->vm_regs.rbx = 0;
126   info->vm_regs.rdx = 0;
127   info->vm_regs.rcx = 0;
128   info->vm_regs.rax = 0;
129   
130   return 0;
131 }
132
133
134 // can we start a kernel thread here...
135 int start_svm_guest(struct guest_info *info) {
136
137
138
139   PrintDebug("Launching SVM VM (vmcb=%x)\n", info->vmm_data);
140   //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
141
142   while (1) {
143     PrintDebug("SVM Launch Args (vmcb=%x), (info=%x), (vm_regs=%x)\n", info->vmm_data,  &(info->vm_regs));
144     safe_svm_launch((vmcb_t*)(info->vmm_data), &(info->vm_regs));
145     //launch_svm((vmcb_t*)(info->vmm_data));
146     PrintDebug("SVM Returned\n");
147
148     if (handle_svm_exit(info) != 0) {
149       // handle exit code....
150       break;
151     }
152   }
153   return 0;
154 }
155
156
157
158 vmcb_t * Allocate_VMCB() {
159   vmcb_t * vmcb_page = (vmcb_t*)os_hooks->allocate_pages(1);
160
161
162   memset(vmcb_page, 0, 4096);
163
164   return vmcb_page;
165 }
166
167
168 void Init_VMCB_Real(vmcb_t * vmcb, struct guest_info vm_info) {
169   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
170   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
171   uint_t i;
172
173
174   guest_state->rsp = vm_info.vm_regs.rsp;
175   guest_state->rip = vm_info.rip;
176
177
178
179
180
181   guest_state->efer |= EFER_MSR_svm_enable;
182   guest_state->rflags = 0x00000002; // The reserved bit is always 1
183   ctrl_area->svm_instrs.instrs.VMRUN = 1;
184   // guest_state->cr0 = 0x00000001;    // PE 
185   ctrl_area->guest_ASID = 1;
186   guest_state->cr0 = 0x60000010;
187
188
189   ctrl_area->exceptions.ex_names.de = 1;
190   ctrl_area->exceptions.ex_names.df = 1;
191   ctrl_area->exceptions.ex_names.pf = 1;
192   ctrl_area->exceptions.ex_names.ts = 1;
193   ctrl_area->exceptions.ex_names.ss = 1;
194   ctrl_area->exceptions.ex_names.ac = 1;
195   ctrl_area->exceptions.ex_names.mc = 1;
196   ctrl_area->exceptions.ex_names.gp = 1;
197   ctrl_area->exceptions.ex_names.ud = 1;
198   ctrl_area->exceptions.ex_names.np = 1;
199   ctrl_area->exceptions.ex_names.of = 1;
200   ctrl_area->exceptions.ex_names.nmi = 1;
201
202   guest_state->cs.selector = 0xf000;
203   guest_state->cs.limit=0xffff;
204   guest_state->cs.base =  0xffff0000;
205   guest_state->cs.attrib.raw = 0x9a;
206
207   
208   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
209   for ( i = 0; segregs[i] != NULL; i++) {
210     struct vmcb_selector * seg = segregs[i];
211     
212     seg->selector = 0x0000;
213     seg->base = 0xffff0000;
214     seg->attrib.raw = 0x9b;
215     seg->limit = 0xffff;
216   }
217   
218   /* Set GPRs */
219   /*
220     EDX == 0xfxx
221     EAX, EBX, ECX, ESI, EDI, EBP, ESP == 0x0
222   */
223
224   guest_state->gdtr.base = 0;
225   guest_state->gdtr.limit = 0xffff;
226   guest_state->gdtr.attrib.raw = 0x0;
227
228   guest_state->idtr.base = 0;
229   guest_state->idtr.limit = 0xffff;
230   guest_state->idtr.attrib.raw = 0x0;
231
232   guest_state->ldtr.base = 0;
233   guest_state->ldtr.limit = 0xffff;
234   guest_state->ldtr.attrib.raw = 0x82;
235
236   guest_state->tr.base = 0;
237   guest_state->tr.limit = 0xffff;
238   guest_state->tr.attrib.raw = 0x83;
239
240
241
242
243   if (vm_info.io_map.num_ports > 0) {
244     vmm_io_hook_t * iter;
245     addr_t io_port_bitmap;
246     
247     io_port_bitmap = (addr_t)os_hooks->allocate_pages(3);
248     memset((uchar_t*)io_port_bitmap, 0, PAGE_SIZE * 3);
249     
250     ctrl_area->IOPM_BASE_PA = io_port_bitmap;
251
252     //PrintDebug("Setting up IO Map at 0x%x\n", io_port_bitmap);
253
254     FOREACH_IO_HOOK(vm_info.io_map, iter) {
255       ushort_t port = iter->port;
256       uchar_t * bitmap = (uchar_t *)io_port_bitmap;
257
258       bitmap += (port / 8);
259       PrintDebug("Setting Bit in block %x\n", bitmap);
260       *bitmap |= 1 << (port % 8);
261     }
262
263     //    memset((uchar_t*)io_port_bitmap, 0xff, PAGE_SIZE * 2);
264     //PrintDebugMemDump((uchar_t*)io_port_bitmap, PAGE_SIZE *2);
265
266     ctrl_area->instrs.instrs.IOIO_PROT = 1;
267   }
268
269   ctrl_area->instrs.instrs.INTR = 1;
270
271   // also determine if CPU supports nested paging
272
273   if (vm_info.page_mode == SHADOW_PAGING) {
274     PrintDebug("Creating initial shadow page table\n");
275     vm_info.shdw_pg_state.shadow_cr3.e_reg.low |= ((addr_t)create_passthrough_pde32_pts(&vm_info) & ~0xfff);
276     PrintDebug("Created\n");
277
278     guest_state->cr3 = vm_info.shdw_pg_state.shadow_cr3.r_reg;
279
280     ctrl_area->cr_reads.crs.cr3 = 1;
281     ctrl_area->cr_writes.crs.cr3 = 1;
282     ctrl_area->cr_reads.crs.cr0 = 1;
283     ctrl_area->cr_writes.crs.cr0 = 1;
284
285     ctrl_area->instrs.instrs.INVLPG = 1;
286     ctrl_area->instrs.instrs.INVLPGA = 1;
287
288         
289     guest_state->g_pat = 0x7040600070406ULL;
290
291     guest_state->cr0 |= 0x80000000;
292   } else if (vm_info.page_mode == NESTED_PAGING) {
293     // Flush the TLB on entries/exits
294     //ctrl_area->TLB_CONTROL = 1;
295
296     // Enable Nested Paging
297     //ctrl_area->NP_ENABLE = 1;
298
299     //PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
300
301         // Set the Nested Page Table pointer
302     //    ctrl_area->N_CR3 = ((addr_t)vm_info.page_tables);
303     // ctrl_area->N_CR3 = (addr_t)(vm_info.page_tables);
304
305     //   ctrl_area->N_CR3 = Get_CR3();
306     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
307
308     //    guest_state->g_pat = 0x7040600070406ULL;
309   }
310
311 }
312
313
314 void Init_VMCB(vmcb_t * vmcb, struct guest_info vm_info) {
315   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
316   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
317   uint_t i;
318
319
320   guest_state->rsp = vm_info.vm_regs.rsp;
321   guest_state->rip = vm_info.rip;
322
323
324   //ctrl_area->instrs.instrs.CR0 = 1;
325   ctrl_area->cr_reads.crs.cr0 = 1;
326   ctrl_area->cr_writes.crs.cr0 = 1;
327
328   guest_state->efer |= EFER_MSR_svm_enable;
329   guest_state->rflags = 0x00000002; // The reserved bit is always 1
330   ctrl_area->svm_instrs.instrs.VMRUN = 1;
331   // guest_state->cr0 = 0x00000001;    // PE 
332   ctrl_area->guest_ASID = 1;
333
334
335   ctrl_area->exceptions.ex_names.de = 1;
336   ctrl_area->exceptions.ex_names.df = 1;
337   ctrl_area->exceptions.ex_names.pf = 1;
338   ctrl_area->exceptions.ex_names.ts = 1;
339   ctrl_area->exceptions.ex_names.ss = 1;
340   ctrl_area->exceptions.ex_names.ac = 1;
341   ctrl_area->exceptions.ex_names.mc = 1;
342   ctrl_area->exceptions.ex_names.gp = 1;
343   ctrl_area->exceptions.ex_names.ud = 1;
344   ctrl_area->exceptions.ex_names.np = 1;
345   ctrl_area->exceptions.ex_names.of = 1;
346   ctrl_area->exceptions.ex_names.nmi = 1;
347
348   guest_state->cs.selector = 0x0000;
349   guest_state->cs.limit=~0u;
350   guest_state->cs.base = guest_state->cs.selector<<4;
351   guest_state->cs.attrib.raw = 0xf3;
352
353   
354   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
355   for ( i = 0; segregs[i] != NULL; i++) {
356     struct vmcb_selector * seg = segregs[i];
357     
358     seg->selector = 0x0000;
359     seg->base = seg->selector << 4;
360     seg->attrib.raw = 0xf3;
361     seg->limit = ~0u;
362   }
363   
364   if (vm_info.io_map.num_ports > 0) {
365     vmm_io_hook_t * iter;
366     addr_t io_port_bitmap;
367     
368     io_port_bitmap = (addr_t)os_hooks->allocate_pages(3);
369     memset((uchar_t*)io_port_bitmap, 0, PAGE_SIZE * 3);
370     
371     ctrl_area->IOPM_BASE_PA = io_port_bitmap;
372
373     //PrintDebug("Setting up IO Map at 0x%x\n", io_port_bitmap);
374
375     FOREACH_IO_HOOK(vm_info.io_map, iter) {
376       ushort_t port = iter->port;
377       uchar_t * bitmap = (uchar_t *)io_port_bitmap;
378
379       bitmap += (port / 8);
380       PrintDebug("Setting Bit in block %x\n", bitmap);
381       *bitmap |= 1 << (port % 8);
382     }
383
384
385     //PrintDebugMemDump((uchar_t*)io_port_bitmap, PAGE_SIZE *2);
386
387     ctrl_area->instrs.instrs.IOIO_PROT = 1;
388   }
389
390   ctrl_area->instrs.instrs.INTR = 1;
391
392
393
394   if (vm_info.page_mode == SHADOW_PAGING) {
395     PrintDebug("Creating initial shadow page table\n");
396     vm_info.shdw_pg_state.shadow_cr3.e_reg.low |= ((addr_t)create_passthrough_pde32_pts(&vm_info) & ~0xfff);
397     PrintDebug("Created\n");
398
399     guest_state->cr3 = vm_info.shdw_pg_state.shadow_cr3.r_reg;
400
401     ctrl_area->cr_reads.crs.cr3 = 1;
402     ctrl_area->cr_writes.crs.cr3 = 1;
403
404
405     ctrl_area->instrs.instrs.INVLPG = 1;
406     ctrl_area->instrs.instrs.INVLPGA = 1;
407
408     guest_state->g_pat = 0x7040600070406ULL;
409
410     guest_state->cr0 |= 0x80000000;
411   } else if (vm_info.page_mode == NESTED_PAGING) {
412     // Flush the TLB on entries/exits
413     //ctrl_area->TLB_CONTROL = 1;
414
415     // Enable Nested Paging
416     //ctrl_area->NP_ENABLE = 1;
417
418     //PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
419
420         // Set the Nested Page Table pointer
421     //    ctrl_area->N_CR3 = ((addr_t)vm_info.page_tables);
422     // ctrl_area->N_CR3 = (addr_t)(vm_info.page_tables);
423
424     //   ctrl_area->N_CR3 = Get_CR3();
425     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
426
427     //    guest_state->g_pat = 0x7040600070406ULL;
428   }
429
430
431
432 }
433
434 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
435   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
436   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
437   uint_t i = 0;
438
439
440   guest_state->rsp = vm_info.vm_regs.rsp;
441   guest_state->rip = vm_info.rip;
442
443
444   /* I pretty much just gutted this from TVMM */
445   /* Note: That means its probably wrong */
446
447   // set the segment registers to mirror ours
448   guest_state->cs.selector = 1<<3;
449   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
450   guest_state->cs.attrib.fields.S = 1;
451   guest_state->cs.attrib.fields.P = 1;
452   guest_state->cs.attrib.fields.db = 1;
453   guest_state->cs.attrib.fields.G = 1;
454   guest_state->cs.limit = 0xfffff;
455   guest_state->cs.base = 0;
456   
457   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
458   for ( i = 0; segregs[i] != NULL; i++) {
459     struct vmcb_selector * seg = segregs[i];
460     
461     seg->selector = 2<<3;
462     seg->attrib.fields.type = 0x2; // Data Segment+read/write
463     seg->attrib.fields.S = 1;
464     seg->attrib.fields.P = 1;
465     seg->attrib.fields.db = 1;
466     seg->attrib.fields.G = 1;
467     seg->limit = 0xfffff;
468     seg->base = 0;
469   }
470
471
472   {
473     /* JRL THIS HAS TO GO */
474     
475     guest_state->tr.selector = GetTR_Selector();
476     guest_state->tr.attrib.fields.type = 0x9; 
477     guest_state->tr.attrib.fields.P = 1;
478     guest_state->tr.limit = GetTR_Limit();
479     guest_state->tr.base = GetTR_Base();// - 0x2000;
480     /* ** */
481   }
482
483
484   /* ** */
485
486
487   guest_state->efer |= EFER_MSR_svm_enable;
488   guest_state->rflags = 0x00000002; // The reserved bit is always 1
489   ctrl_area->svm_instrs.instrs.VMRUN = 1;
490   guest_state->cr0 = 0x00000001;    // PE 
491   ctrl_area->guest_ASID = 1;
492
493
494   //  guest_state->cpl = 0;
495
496
497
498   // Setup exits
499
500   ctrl_area->cr_writes.crs.cr4 = 1;
501   
502   ctrl_area->exceptions.ex_names.de = 1;
503   ctrl_area->exceptions.ex_names.df = 1;
504   ctrl_area->exceptions.ex_names.pf = 1;
505   ctrl_area->exceptions.ex_names.ts = 1;
506   ctrl_area->exceptions.ex_names.ss = 1;
507   ctrl_area->exceptions.ex_names.ac = 1;
508   ctrl_area->exceptions.ex_names.mc = 1;
509   ctrl_area->exceptions.ex_names.gp = 1;
510   ctrl_area->exceptions.ex_names.ud = 1;
511   ctrl_area->exceptions.ex_names.np = 1;
512   ctrl_area->exceptions.ex_names.of = 1;
513   ctrl_area->exceptions.ex_names.nmi = 1;
514
515   
516
517   ctrl_area->instrs.instrs.IOIO_PROT = 1;
518   ctrl_area->IOPM_BASE_PA = (uint_t)os_hooks->allocate_pages(3);
519   
520   {
521     reg_ex_t tmp_reg;
522     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
523     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
524   }
525
526   ctrl_area->instrs.instrs.INTR = 1;
527
528   
529   {
530     char gdt_buf[6];
531     char idt_buf[6];
532
533     memset(gdt_buf, 0, 6);
534     memset(idt_buf, 0, 6);
535
536
537     uint_t gdt_base, idt_base;
538     ushort_t gdt_limit, idt_limit;
539     
540     GetGDTR(gdt_buf);
541     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
542     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
543     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
544
545     GetIDTR(idt_buf);
546     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
547     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
548     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
549
550
551     // gdt_base -= 0x2000;
552     //idt_base -= 0x2000;
553
554     guest_state->gdtr.base = gdt_base;
555     guest_state->gdtr.limit = gdt_limit;
556     guest_state->idtr.base = idt_base;
557     guest_state->idtr.limit = idt_limit;
558
559
560   }
561   
562   
563   // also determine if CPU supports nested paging
564   /*
565   if (vm_info.page_tables) {
566     //   if (0) {
567     // Flush the TLB on entries/exits
568     ctrl_area->TLB_CONTROL = 1;
569
570     // Enable Nested Paging
571     ctrl_area->NP_ENABLE = 1;
572
573     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
574
575         // Set the Nested Page Table pointer
576     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
577
578
579     //   ctrl_area->N_CR3 = Get_CR3();
580     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
581
582     guest_state->g_pat = 0x7040600070406ULL;
583
584     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
585     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
586     // Enable Paging
587     //    guest_state->cr0 |= 0x80000000;
588   }
589   */
590
591 }
592
593