Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


f3865caf01d9ffa44b39eec8fd5202637501d66b
[palacios.git] / palacios / src / geekos / svm.c
1 #include <geekos/svm.h>
2 #include <geekos/vmm.h>
3
4 #include <geekos/vmcb.h>
5 #include <geekos/vmm_mem.h>
6 #include <geekos/vmm_paging.h>
7 #include <geekos/svm_handler.h>
8
9 #include <geekos/vmm_debug.h>
10
11
12 /* TEMPORARY BECAUSE SVM IS WEIRD */
13 #include <geekos/tss.h>
14 /* ** */
15
16 extern struct vmm_os_hooks * os_hooks;
17
18 extern uint_t cpuid_ecx(uint_t op);
19 extern uint_t cpuid_edx(uint_t op);
20 extern void Get_MSR(uint_t MSR, uint_t * high_byte, uint_t * low_byte); 
21 extern void Set_MSR(uint_t MSR, uint_t high_byte, uint_t low_byte);
22 extern uint_t launch_svm(vmcb_t * vmcb_addr);
23 extern void safe_svm_launch(vmcb_t * vmcb_addr, struct guest_gprs * gprs);
24
25 extern uint_t Get_CR3();
26
27 extern void GetGDTR(void * gdt);
28 extern void GetIDTR(void * idt);
29
30 extern void DisableInts();
31
32 /* Checks machine SVM capability */
33 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
34 int is_svm_capable() {
35   uint_t ret =  cpuid_ecx(CPUID_FEATURE_IDS);
36   uint_t vm_cr_low = 0, vm_cr_high = 0;
37
38
39   if ((ret & CPUID_FEATURE_IDS_ecx_svm_avail) == 0) {
40     PrintDebug("SVM Not Available\n");
41     return 0;
42   } 
43
44   Get_MSR(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
45
46   if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 0) {
47     return 1;
48   }
49
50   ret = cpuid_edx(CPUID_SVM_REV_AND_FEATURE_IDS);
51   
52
53   if ((ret & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
54     PrintDebug("Nested Paging not supported\n");
55   }
56
57   if ((ret & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
58     PrintDebug("SVM BIOS Disabled, not unlockable\n");
59   } else {
60     PrintDebug("SVM is locked with a key\n");
61   }
62
63   return 0;
64 }
65
66
67
68 void Init_SVM(struct vmm_ctrl_ops * vmm_ops) {
69   reg_ex_t msr;
70   void * host_state;
71
72
73   // Enable SVM on the CPU
74   Get_MSR(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
75   msr.e_reg.low |= EFER_MSR_svm_enable;
76   Set_MSR(EFER_MSR, 0, msr.e_reg.low);
77   
78   PrintDebug("SVM Enabled\n");
79
80
81   // Setup the host state save area
82   host_state = os_hooks->allocate_pages(4);
83   
84   msr.e_reg.high = 0;
85   msr.e_reg.low = (uint_t)host_state;
86
87
88   PrintDebug("Host State being saved at %x\n", (uint_t)host_state);
89   Set_MSR(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
90
91
92
93   // Setup the SVM specific vmm operations
94   vmm_ops->init_guest = &init_svm_guest;
95   vmm_ops->start_guest = &start_svm_guest;
96
97
98   return;
99 }
100
101
102 int init_svm_guest(struct guest_info *info) {
103  
104   PrintDebug("Allocating VMCB\n");
105   info->vmm_data = (void*)Allocate_VMCB();
106
107
108   //PrintDebug("Generating Guest nested page tables\n");
109   //  info->page_tables = NULL;
110   //info->page_tables = generate_guest_page_tables_64(&(info->mem_layout), &(info->mem_list));
111   //info->page_tables = generate_guest_page_tables(&(info->mem_layout), &(info->mem_list));
112   //PrintDebugPageTables(info->page_tables);
113
114   
115
116   PrintDebug("Initializing VMCB (addr=%x)\n", info->vmm_data);
117   Init_VMCB((vmcb_t*)(info->vmm_data), *info);
118   
119   
120   info->vm_regs.rbx = 0;
121   info->vm_regs.rcx = 0;
122   info->vm_regs.rdx = 0;
123   info->vm_regs.rsi = 0;
124   info->vm_regs.rdi = 0;
125   info->vm_regs.rbp = 0;
126
127   return 0;
128 }
129
130
131 // can we start a kernel thread here...
132 int start_svm_guest(struct guest_info *info) {
133
134
135
136   PrintDebug("Launching SVM VM (vmcb=%x)\n", info->vmm_data);
137   //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
138
139   while (1) {
140
141     safe_svm_launch((vmcb_t*)(info->vmm_data), &(info->vm_regs));
142     //launch_svm((vmcb_t*)(info->vmm_data));
143     PrintDebug("SVM Returned\n");
144
145     if (handle_svm_exit(info) != 0) {
146       break;
147     }
148   }
149   return 0;
150 }
151
152
153
154 vmcb_t * Allocate_VMCB() {
155   vmcb_t * vmcb_page = (vmcb_t*)os_hooks->allocate_pages(1);
156
157
158   memset(vmcb_page, 0, 4096);
159
160   return vmcb_page;
161 }
162
163
164 void Init_VMCB_Real(vmcb_t * vmcb, guest_info_t vm_info) {
165   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
166   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
167   uint_t i;
168
169
170   guest_state->rsp = vm_info.rsp;
171   guest_state->rip = vm_info.rip;
172
173
174
175
176
177   guest_state->efer |= EFER_MSR_svm_enable;
178   guest_state->rflags = 0x00000002; // The reserved bit is always 1
179   ctrl_area->svm_instrs.instrs.VMRUN = 1;
180   // guest_state->cr0 = 0x00000001;    // PE 
181   ctrl_area->guest_ASID = 1;
182   guest_state->cr0 = 0x60000010;
183
184
185   ctrl_area->exceptions.ex_names.de = 1;
186   ctrl_area->exceptions.ex_names.df = 1;
187   ctrl_area->exceptions.ex_names.pf = 1;
188   ctrl_area->exceptions.ex_names.ts = 1;
189   ctrl_area->exceptions.ex_names.ss = 1;
190   ctrl_area->exceptions.ex_names.ac = 1;
191   ctrl_area->exceptions.ex_names.mc = 1;
192   ctrl_area->exceptions.ex_names.gp = 1;
193   ctrl_area->exceptions.ex_names.ud = 1;
194   ctrl_area->exceptions.ex_names.np = 1;
195   ctrl_area->exceptions.ex_names.of = 1;
196   ctrl_area->exceptions.ex_names.nmi = 1;
197
198   guest_state->cs.selector = 0xf000;
199   guest_state->cs.limit=0xffff;
200   guest_state->cs.base =  0xffff0000;
201   guest_state->cs.attrib.raw = 0x9a;
202
203   
204   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
205   for ( i = 0; segregs[i] != NULL; i++) {
206     struct vmcb_selector * seg = segregs[i];
207     
208     seg->selector = 0x0000;
209     seg->base = 0xffff0000;
210     seg->attrib.raw = 0x9b;
211     seg->limit = 0xffff;
212   }
213   
214   /* Set GPRs */
215   /*
216     EDX == 0xfxx
217     EAX, EBX, ECX, ESI, EDI, EBP, ESP == 0x0
218   */
219
220   guest_state->gdtr.base = 0;
221   guest_state->gdtr.limit = 0xffff;
222   guest_state->gdtr.attrib.raw = 0x0;
223
224   guest_state->idtr.base = 0;
225   guest_state->idtr.limit = 0xffff;
226   guest_state->idtr.attrib.raw = 0x0;
227
228   guest_state->ldtr.base = 0;
229   guest_state->ldtr.limit = 0xffff;
230   guest_state->ldtr.attrib.raw = 0x82;
231
232   guest_state->tr.base = 0;
233   guest_state->tr.limit = 0xffff;
234   guest_state->tr.attrib.raw = 0x83;
235
236
237
238
239   if (vm_info.io_map.num_ports > 0) {
240     vmm_io_hook_t * iter;
241     addr_t io_port_bitmap;
242     
243     io_port_bitmap = (addr_t)os_hooks->allocate_pages(3);
244     memset((uchar_t*)io_port_bitmap, 0, PAGE_SIZE * 3);
245     
246     ctrl_area->IOPM_BASE_PA = io_port_bitmap;
247
248     //PrintDebug("Setting up IO Map at 0x%x\n", io_port_bitmap);
249
250     FOREACH_IO_HOOK(vm_info.io_map, iter) {
251       ushort_t port = iter->port;
252       uchar_t * bitmap = (uchar_t *)io_port_bitmap;
253
254       bitmap += (port / 8);
255       PrintDebug("Setting Bit in block %x\n", bitmap);
256       *bitmap |= 1 << (port % 8);
257     }
258
259     //    memset((uchar_t*)io_port_bitmap, 0xff, PAGE_SIZE * 2);
260     //PrintDebugMemDump((uchar_t*)io_port_bitmap, PAGE_SIZE *2);
261
262     ctrl_area->instrs.instrs.IOIO_PROT = 1;
263   }
264
265   ctrl_area->instrs.instrs.INTR = 1;
266
267   // also determine if CPU supports nested paging
268
269   if (vm_info.page_mode == SHADOW_PAGING) {
270     PrintDebug("Creating initial shadow page table\n");
271     vm_info.shadow_page_state.shadow_cr3.e_reg.low |= ((addr_t)create_passthrough_pde32_pts(&(vm_info.mem_map)) & ~0xfff);
272     PrintDebug("Created\n");
273
274     guest_state->cr3 = vm_info.shadow_page_state.shadow_cr3.r_reg;
275
276     ctrl_area->cr_reads.crs.cr3 = 1;
277     ctrl_area->cr_writes.crs.cr3 = 1;
278     ctrl_area->cr_reads.crs.cr0 = 1;
279     ctrl_area->cr_writes.crs.cr0 = 1;
280
281     ctrl_area->instrs.instrs.INVLPG = 1;
282     ctrl_area->instrs.instrs.INVLPGA = 1;
283
284         
285     guest_state->g_pat = 0x7040600070406ULL;
286
287     guest_state->cr0 |= 0x80000000;
288   } else if (vm_info.page_mode == NESTED_PAGING) {
289     // Flush the TLB on entries/exits
290     //ctrl_area->TLB_CONTROL = 1;
291
292     // Enable Nested Paging
293     //ctrl_area->NP_ENABLE = 1;
294
295     //PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
296
297         // Set the Nested Page Table pointer
298     //    ctrl_area->N_CR3 = ((addr_t)vm_info.page_tables);
299     // ctrl_area->N_CR3 = (addr_t)(vm_info.page_tables);
300
301     //   ctrl_area->N_CR3 = Get_CR3();
302     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
303
304     //    guest_state->g_pat = 0x7040600070406ULL;
305   }
306
307 }
308
309
310 void Init_VMCB(vmcb_t * vmcb, guest_info_t vm_info) {
311   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
312   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
313   uint_t i;
314
315
316   guest_state->rsp = vm_info.rsp;
317   guest_state->rip = vm_info.rip;
318
319
320
321   ctrl_area->cr_writes.crs.cr0 = 1;
322
323   guest_state->efer |= EFER_MSR_svm_enable;
324   guest_state->rflags = 0x00000002; // The reserved bit is always 1
325   ctrl_area->svm_instrs.instrs.VMRUN = 1;
326   // guest_state->cr0 = 0x00000001;    // PE 
327   ctrl_area->guest_ASID = 1;
328
329
330   ctrl_area->exceptions.ex_names.de = 1;
331   ctrl_area->exceptions.ex_names.df = 1;
332   ctrl_area->exceptions.ex_names.pf = 1;
333   ctrl_area->exceptions.ex_names.ts = 1;
334   ctrl_area->exceptions.ex_names.ss = 1;
335   ctrl_area->exceptions.ex_names.ac = 1;
336   ctrl_area->exceptions.ex_names.mc = 1;
337   ctrl_area->exceptions.ex_names.gp = 1;
338   ctrl_area->exceptions.ex_names.ud = 1;
339   ctrl_area->exceptions.ex_names.np = 1;
340   ctrl_area->exceptions.ex_names.of = 1;
341   ctrl_area->exceptions.ex_names.nmi = 1;
342
343   guest_state->cs.selector = 0x0000;
344   guest_state->cs.limit=~0u;
345   guest_state->cs.base = guest_state->cs.selector<<4;
346   guest_state->cs.attrib.raw = 0xf3;
347
348   
349   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
350   for ( i = 0; segregs[i] != NULL; i++) {
351     struct vmcb_selector * seg = segregs[i];
352     
353     seg->selector = 0x0000;
354     seg->base = seg->selector << 4;
355     seg->attrib.raw = 0xf3;
356     seg->limit = ~0u;
357   }
358   
359   if (vm_info.io_map.num_ports > 0) {
360     vmm_io_hook_t * iter;
361     addr_t io_port_bitmap;
362     
363     io_port_bitmap = (addr_t)os_hooks->allocate_pages(3);
364     memset((uchar_t*)io_port_bitmap, 0, PAGE_SIZE * 3);
365     
366     ctrl_area->IOPM_BASE_PA = io_port_bitmap;
367
368     //PrintDebug("Setting up IO Map at 0x%x\n", io_port_bitmap);
369
370     FOREACH_IO_HOOK(vm_info.io_map, iter) {
371       ushort_t port = iter->port;
372       uchar_t * bitmap = (uchar_t *)io_port_bitmap;
373
374       bitmap += (port / 8);
375       PrintDebug("Setting Bit in block %x\n", bitmap);
376       *bitmap |= 1 << (port % 8);
377     }
378
379
380     //PrintDebugMemDump((uchar_t*)io_port_bitmap, PAGE_SIZE *2);
381
382     ctrl_area->instrs.instrs.IOIO_PROT = 1;
383   }
384
385   ctrl_area->instrs.instrs.INTR = 1;
386
387
388
389   if (vm_info.page_mode == SHADOW_PAGING) {
390     PrintDebug("Creating initial shadow page table\n");
391     vm_info.shadow_page_state.shadow_cr3.e_reg.low |= ((addr_t)create_passthrough_pde32_pts(&(vm_info.mem_map)) & ~0xfff);
392     PrintDebug("Created\n");
393
394     guest_state->cr3 = vm_info.shadow_page_state.shadow_cr3.r_reg;
395
396     ctrl_area->cr_reads.crs.cr3 = 1;
397     ctrl_area->cr_writes.crs.cr3 = 1;
398     ctrl_area->cr_reads.crs.cr0 = 1;
399     ctrl_area->cr_writes.crs.cr0 = 1;
400
401     ctrl_area->instrs.instrs.INVLPG = 1;
402     ctrl_area->instrs.instrs.INVLPGA = 1;
403     ctrl_area->instrs.instrs.CR0 = 1;
404         
405
406
407     guest_state->g_pat = 0x7040600070406ULL;
408
409     guest_state->cr0 |= 0x80000000;
410   } else if (vm_info.page_mode == NESTED_PAGING) {
411     // Flush the TLB on entries/exits
412     //ctrl_area->TLB_CONTROL = 1;
413
414     // Enable Nested Paging
415     //ctrl_area->NP_ENABLE = 1;
416
417     //PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
418
419         // Set the Nested Page Table pointer
420     //    ctrl_area->N_CR3 = ((addr_t)vm_info.page_tables);
421     // ctrl_area->N_CR3 = (addr_t)(vm_info.page_tables);
422
423     //   ctrl_area->N_CR3 = Get_CR3();
424     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
425
426     //    guest_state->g_pat = 0x7040600070406ULL;
427   }
428
429
430
431 }
432
433 void Init_VMCB_pe(vmcb_t *vmcb, guest_info_t vm_info) {
434   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
435   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
436   uint_t i = 0;
437
438
439   guest_state->rsp = vm_info.rsp;
440   guest_state->rip = vm_info.rip;
441
442
443   /* I pretty much just gutted this from TVMM */
444   /* Note: That means its probably wrong */
445
446   // set the segment registers to mirror ours
447   guest_state->cs.selector = 1<<3;
448   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
449   guest_state->cs.attrib.fields.S = 1;
450   guest_state->cs.attrib.fields.P = 1;
451   guest_state->cs.attrib.fields.db = 1;
452   guest_state->cs.attrib.fields.G = 1;
453   guest_state->cs.limit = 0xfffff;
454   guest_state->cs.base = 0;
455   
456   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
457   for ( i = 0; segregs[i] != NULL; i++) {
458     struct vmcb_selector * seg = segregs[i];
459     
460     seg->selector = 2<<3;
461     seg->attrib.fields.type = 0x2; // Data Segment+read/write
462     seg->attrib.fields.S = 1;
463     seg->attrib.fields.P = 1;
464     seg->attrib.fields.db = 1;
465     seg->attrib.fields.G = 1;
466     seg->limit = 0xfffff;
467     seg->base = 0;
468   }
469
470
471   {
472     /* JRL THIS HAS TO GO */
473     
474     guest_state->tr.selector = GetTR_Selector();
475     guest_state->tr.attrib.fields.type = 0x9; 
476     guest_state->tr.attrib.fields.P = 1;
477     guest_state->tr.limit = GetTR_Limit();
478     guest_state->tr.base = GetTR_Base();// - 0x2000;
479     /* ** */
480   }
481
482
483   /* ** */
484
485
486   guest_state->efer |= EFER_MSR_svm_enable;
487   guest_state->rflags = 0x00000002; // The reserved bit is always 1
488   ctrl_area->svm_instrs.instrs.VMRUN = 1;
489   guest_state->cr0 = 0x00000001;    // PE 
490   ctrl_area->guest_ASID = 1;
491
492
493   //  guest_state->cpl = 0;
494
495
496
497   // Setup exits
498
499   ctrl_area->cr_writes.crs.cr4 = 1;
500   
501   ctrl_area->exceptions.ex_names.de = 1;
502   ctrl_area->exceptions.ex_names.df = 1;
503   ctrl_area->exceptions.ex_names.pf = 1;
504   ctrl_area->exceptions.ex_names.ts = 1;
505   ctrl_area->exceptions.ex_names.ss = 1;
506   ctrl_area->exceptions.ex_names.ac = 1;
507   ctrl_area->exceptions.ex_names.mc = 1;
508   ctrl_area->exceptions.ex_names.gp = 1;
509   ctrl_area->exceptions.ex_names.ud = 1;
510   ctrl_area->exceptions.ex_names.np = 1;
511   ctrl_area->exceptions.ex_names.of = 1;
512   ctrl_area->exceptions.ex_names.nmi = 1;
513
514   
515
516   ctrl_area->instrs.instrs.IOIO_PROT = 1;
517   ctrl_area->IOPM_BASE_PA = (uint_t)os_hooks->allocate_pages(3);
518   
519   {
520     reg_ex_t tmp_reg;
521     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
522     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
523   }
524
525   ctrl_area->instrs.instrs.INTR = 1;
526
527   
528   {
529     char gdt_buf[6];
530     char idt_buf[6];
531
532     memset(gdt_buf, 0, 6);
533     memset(idt_buf, 0, 6);
534
535
536     uint_t gdt_base, idt_base;
537     ushort_t gdt_limit, idt_limit;
538     
539     GetGDTR(gdt_buf);
540     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
541     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
542     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
543
544     GetIDTR(idt_buf);
545     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
546     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
547     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
548
549
550     // gdt_base -= 0x2000;
551     //idt_base -= 0x2000;
552
553     guest_state->gdtr.base = gdt_base;
554     guest_state->gdtr.limit = gdt_limit;
555     guest_state->idtr.base = idt_base;
556     guest_state->idtr.limit = idt_limit;
557
558
559   }
560   
561   
562   // also determine if CPU supports nested paging
563   /*
564   if (vm_info.page_tables) {
565     //   if (0) {
566     // Flush the TLB on entries/exits
567     ctrl_area->TLB_CONTROL = 1;
568
569     // Enable Nested Paging
570     ctrl_area->NP_ENABLE = 1;
571
572     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
573
574         // Set the Nested Page Table pointer
575     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
576
577
578     //   ctrl_area->N_CR3 = Get_CR3();
579     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
580
581     guest_state->g_pat = 0x7040600070406ULL;
582
583     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
584     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
585     // Enable Paging
586     //    guest_state->cr0 |= 0x80000000;
587   }
588   */
589
590 }
591
592