Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


ab2a406458e7ce7a404c38c28880c7ba1a32f0d6
[palacios.git] / palacios / src / geekos / svm.c
1 #include <geekos/svm.h>
2 #include <geekos/vmm.h>
3
4 #include <geekos/vmcb.h>
5 #include <geekos/vmm_mem.h>
6 #include <geekos/vmm_paging.h>
7 #include <geekos/svm_handler.h>
8
9 #include <geekos/vmm_debug.h>
10
11
12 /* TEMPORARY BECAUSE SVM IS WEIRD */
13 #include <geekos/tss.h>
14 /* ** */
15
16 extern struct vmm_os_hooks * os_hooks;
17
18 extern uint_t cpuid_ecx(uint_t op);
19 extern uint_t cpuid_edx(uint_t op);
20 extern void Get_MSR(uint_t MSR, uint_t * high_byte, uint_t * low_byte); 
21 extern void Set_MSR(uint_t MSR, uint_t high_byte, uint_t low_byte);
22 extern uint_t launch_svm(vmcb_t * vmcb_addr);
23 extern void safe_svm_launch(vmcb_t * vmcb_addr, struct guest_gprs * gprs);
24
25 extern uint_t Get_CR3();
26
27 extern void GetGDTR(void * gdt);
28 extern void GetIDTR(void * idt);
29
30 extern void DisableInts();
31
32 /* Checks machine SVM capability */
33 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
34 int is_svm_capable() {
35   uint_t ret =  cpuid_ecx(CPUID_FEATURE_IDS);
36   uint_t vm_cr_low = 0, vm_cr_high = 0;
37
38
39   if ((ret & CPUID_FEATURE_IDS_ecx_svm_avail) == 0) {
40     PrintDebug("SVM Not Available\n");
41     return 0;
42   } 
43
44   Get_MSR(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
45
46   if ((ret & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 1) {
47     PrintDebug("Nested Paging not supported\n");
48   }
49
50   if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 0) {
51     return 1;
52   }
53
54   ret = cpuid_edx(CPUID_SVM_REV_AND_FEATURE_IDS);
55
56   if ((ret & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
57     PrintDebug("SVM BIOS Disabled, not unlockable\n");
58   } else {
59     PrintDebug("SVM is locked with a key\n");
60   }
61
62   return 0;
63 }
64
65
66
67 void Init_SVM(struct vmm_ctrl_ops * vmm_ops) {
68   reg_ex_t msr;
69   void * host_state;
70
71
72   // Enable SVM on the CPU
73   Get_MSR(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
74   msr.e_reg.low |= EFER_MSR_svm_enable;
75   Set_MSR(EFER_MSR, 0, msr.e_reg.low);
76   
77   PrintDebug("SVM Enabled\n");
78
79
80   // Setup the host state save area
81   host_state = os_hooks->allocate_pages(4);
82   
83   msr.e_reg.high = 0;
84   msr.e_reg.low = (uint_t)host_state;
85
86
87   PrintDebug("Host State being saved at %x\n", (uint_t)host_state);
88   Set_MSR(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
89
90
91
92   // Setup the SVM specific vmm operations
93   vmm_ops->init_guest = &init_svm_guest;
94   vmm_ops->start_guest = &start_svm_guest;
95
96
97   return;
98 }
99
100
101 int init_svm_guest(struct guest_info *info) {
102  
103   PrintDebug("Allocating VMCB\n");
104   info->vmm_data = (void*)Allocate_VMCB();
105
106
107   //PrintDebug("Generating Guest nested page tables\n");
108   //  info->page_tables = NULL;
109   //info->page_tables = generate_guest_page_tables_64(&(info->mem_layout), &(info->mem_list));
110   //info->page_tables = generate_guest_page_tables(&(info->mem_layout), &(info->mem_list));
111   //PrintDebugPageTables(info->page_tables);
112
113   
114
115   PrintDebug("Initializing VMCB (addr=%x)\n", info->vmm_data);
116   Init_VMCB((vmcb_t*)(info->vmm_data), *info);
117   
118   
119   info->vm_regs.rbx = 0;
120   info->vm_regs.rcx = 0;
121   info->vm_regs.rdx = 0;
122   info->vm_regs.rsi = 0;
123   info->vm_regs.rdi = 0;
124   info->vm_regs.rbp = 0;
125
126   return 0;
127 }
128
129
130 // can we start a kernel thread here...
131 int start_svm_guest(struct guest_info *info) {
132
133
134
135   PrintDebug("Launching SVM VM (vmcb=%x)\n", info->vmm_data);
136   //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
137
138   while (1) {
139
140     safe_svm_launch((vmcb_t*)(info->vmm_data), &(info->vm_regs));
141     //launch_svm((vmcb_t*)(info->vmm_data));
142     PrintDebug("SVM Returned\n");
143
144     if (handle_svm_exit(info) != 0) {
145       break;
146     }
147   }
148   return 0;
149 }
150
151
152
153 vmcb_t * Allocate_VMCB() {
154   vmcb_t * vmcb_page = (vmcb_t*)os_hooks->allocate_pages(1);
155
156
157   memset(vmcb_page, 0, 4096);
158
159   return vmcb_page;
160 }
161
162
163 void Init_VMCB_Real(vmcb_t * vmcb, guest_info_t vm_info) {
164   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
165   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
166   uint_t i;
167
168
169   guest_state->rsp = vm_info.rsp;
170   guest_state->rip = vm_info.rip;
171
172
173
174
175
176   guest_state->efer |= EFER_MSR_svm_enable;
177   guest_state->rflags = 0x00000002; // The reserved bit is always 1
178   ctrl_area->svm_instrs.instrs.VMRUN = 1;
179   // guest_state->cr0 = 0x00000001;    // PE 
180   ctrl_area->guest_ASID = 1;
181   guest_state->cr0 = 0x60000010;
182
183
184   ctrl_area->exceptions.ex_names.de = 1;
185   ctrl_area->exceptions.ex_names.df = 1;
186   ctrl_area->exceptions.ex_names.pf = 1;
187   ctrl_area->exceptions.ex_names.ts = 1;
188   ctrl_area->exceptions.ex_names.ss = 1;
189   ctrl_area->exceptions.ex_names.ac = 1;
190   ctrl_area->exceptions.ex_names.mc = 1;
191   ctrl_area->exceptions.ex_names.gp = 1;
192   ctrl_area->exceptions.ex_names.ud = 1;
193   ctrl_area->exceptions.ex_names.np = 1;
194   ctrl_area->exceptions.ex_names.of = 1;
195   ctrl_area->exceptions.ex_names.nmi = 1;
196
197   guest_state->cs.selector = 0xf000;
198   guest_state->cs.limit=0xffff;
199   guest_state->cs.base =  0xffff0000;
200   guest_state->cs.attrib.raw = 0x9a;
201
202   
203   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
204   for ( i = 0; segregs[i] != NULL; i++) {
205     struct vmcb_selector * seg = segregs[i];
206     
207     seg->selector = 0x0000;
208     seg->base = 0xffff0000;
209     seg->attrib.raw = 0x9b;
210     seg->limit = 0xffff;
211   }
212   
213   /* Set GPRs */
214   /*
215     EDX == 0xfxx
216     EAX, EBX, ECX, ESI, EDI, EBP, ESP == 0x0
217   */
218
219   guest_state->gdtr.base = 0;
220   guest_state->gdtr.limit = 0xffff;
221   guest_state->gdtr.attrib.raw = 0x0;
222
223   guest_state->idtr.base = 0;
224   guest_state->idtr.limit = 0xffff;
225   guest_state->idtr.attrib.raw = 0x0;
226
227   guest_state->ldtr.base = 0;
228   guest_state->ldtr.limit = 0xffff;
229   guest_state->ldtr.attrib.raw = 0x82;
230
231   guest_state->tr.base = 0;
232   guest_state->tr.limit = 0xffff;
233   guest_state->tr.attrib.raw = 0x83;
234
235
236
237
238   if (vm_info.io_map.num_ports > 0) {
239     vmm_io_hook_t * iter;
240     addr_t io_port_bitmap;
241     
242     io_port_bitmap = (addr_t)os_hooks->allocate_pages(3);
243     memset((uchar_t*)io_port_bitmap, 0, PAGE_SIZE * 3);
244     
245     ctrl_area->IOPM_BASE_PA = io_port_bitmap;
246
247     //PrintDebug("Setting up IO Map at 0x%x\n", io_port_bitmap);
248
249     FOREACH_IO_HOOK(vm_info.io_map, iter) {
250       ushort_t port = iter->port;
251       uchar_t * bitmap = (uchar_t *)io_port_bitmap;
252
253       bitmap += (port / 8);
254       PrintDebug("Setting Bit in block %x\n", bitmap);
255       *bitmap |= 1 << (port % 8);
256     }
257
258     //    memset((uchar_t*)io_port_bitmap, 0xff, PAGE_SIZE * 2);
259     //PrintDebugMemDump((uchar_t*)io_port_bitmap, PAGE_SIZE *2);
260
261     ctrl_area->instrs.instrs.IOIO_PROT = 1;
262   }
263
264   ctrl_area->instrs.instrs.INTR = 1;
265
266   // also determine if CPU supports nested paging
267
268   if (vm_info.page_mode == SHADOW_PAGING) {
269     PrintDebug("Creating initial shadow page table\n");
270     vm_info.shadow_page_state.shadow_cr3.e_reg.low |= ((addr_t)create_passthrough_pde32_pts(&(vm_info.mem_map)) & ~0xfff);
271     PrintDebug("Created\n");
272
273     guest_state->cr3 = vm_info.shadow_page_state.shadow_cr3.r_reg;
274
275     ctrl_area->cr_reads.crs.cr3 = 1;
276     ctrl_area->cr_writes.crs.cr3 = 1;
277     ctrl_area->cr_reads.crs.cr0 = 1;
278     ctrl_area->cr_writes.crs.cr0 = 1;
279
280     ctrl_area->instrs.instrs.INVLPG = 1;
281     ctrl_area->instrs.instrs.INVLPGA = 1;
282
283         
284     guest_state->g_pat = 0x7040600070406ULL;
285
286     guest_state->cr0 |= 0x80000000;
287   } else if (vm_info.page_mode == NESTED_PAGING) {
288     // Flush the TLB on entries/exits
289     //ctrl_area->TLB_CONTROL = 1;
290
291     // Enable Nested Paging
292     //ctrl_area->NP_ENABLE = 1;
293
294     //PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
295
296         // Set the Nested Page Table pointer
297     //    ctrl_area->N_CR3 = ((addr_t)vm_info.page_tables);
298     // ctrl_area->N_CR3 = (addr_t)(vm_info.page_tables);
299
300     //   ctrl_area->N_CR3 = Get_CR3();
301     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
302
303     //    guest_state->g_pat = 0x7040600070406ULL;
304   }
305
306 }
307
308
309 void Init_VMCB(vmcb_t * vmcb, guest_info_t vm_info) {
310   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
311   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
312   uint_t i;
313
314
315   guest_state->rsp = vm_info.rsp;
316   guest_state->rip = vm_info.rip;
317
318
319   //ctrl_area->instrs.instrs.CR0 = 1;
320   ctrl_area->cr_reads.crs.cr0 = 1;
321   ctrl_area->cr_writes.crs.cr0 = 1;
322
323   guest_state->efer |= EFER_MSR_svm_enable;
324   guest_state->rflags = 0x00000002; // The reserved bit is always 1
325   ctrl_area->svm_instrs.instrs.VMRUN = 1;
326   // guest_state->cr0 = 0x00000001;    // PE 
327   ctrl_area->guest_ASID = 1;
328
329
330   ctrl_area->exceptions.ex_names.de = 1;
331   ctrl_area->exceptions.ex_names.df = 1;
332   ctrl_area->exceptions.ex_names.pf = 1;
333   ctrl_area->exceptions.ex_names.ts = 1;
334   ctrl_area->exceptions.ex_names.ss = 1;
335   ctrl_area->exceptions.ex_names.ac = 1;
336   ctrl_area->exceptions.ex_names.mc = 1;
337   ctrl_area->exceptions.ex_names.gp = 1;
338   ctrl_area->exceptions.ex_names.ud = 1;
339   ctrl_area->exceptions.ex_names.np = 1;
340   ctrl_area->exceptions.ex_names.of = 1;
341   ctrl_area->exceptions.ex_names.nmi = 1;
342
343   guest_state->cs.selector = 0x0000;
344   guest_state->cs.limit=~0u;
345   guest_state->cs.base = guest_state->cs.selector<<4;
346   guest_state->cs.attrib.raw = 0xf3;
347
348   
349   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
350   for ( i = 0; segregs[i] != NULL; i++) {
351     struct vmcb_selector * seg = segregs[i];
352     
353     seg->selector = 0x0000;
354     seg->base = seg->selector << 4;
355     seg->attrib.raw = 0xf3;
356     seg->limit = ~0u;
357   }
358   
359   if (vm_info.io_map.num_ports > 0) {
360     vmm_io_hook_t * iter;
361     addr_t io_port_bitmap;
362     
363     io_port_bitmap = (addr_t)os_hooks->allocate_pages(3);
364     memset((uchar_t*)io_port_bitmap, 0, PAGE_SIZE * 3);
365     
366     ctrl_area->IOPM_BASE_PA = io_port_bitmap;
367
368     //PrintDebug("Setting up IO Map at 0x%x\n", io_port_bitmap);
369
370     FOREACH_IO_HOOK(vm_info.io_map, iter) {
371       ushort_t port = iter->port;
372       uchar_t * bitmap = (uchar_t *)io_port_bitmap;
373
374       bitmap += (port / 8);
375       PrintDebug("Setting Bit in block %x\n", bitmap);
376       *bitmap |= 1 << (port % 8);
377     }
378
379
380     //PrintDebugMemDump((uchar_t*)io_port_bitmap, PAGE_SIZE *2);
381
382     ctrl_area->instrs.instrs.IOIO_PROT = 1;
383   }
384
385   ctrl_area->instrs.instrs.INTR = 1;
386
387
388
389   if (vm_info.page_mode == SHADOW_PAGING) {
390     PrintDebug("Creating initial shadow page table\n");
391     vm_info.shadow_page_state.shadow_cr3.e_reg.low |= ((addr_t)create_passthrough_pde32_pts(&(vm_info.mem_map)) & ~0xfff);
392     PrintDebug("Created\n");
393
394     guest_state->cr3 = vm_info.shadow_page_state.shadow_cr3.r_reg;
395
396     ctrl_area->cr_reads.crs.cr3 = 1;
397     ctrl_area->cr_writes.crs.cr3 = 1;
398
399
400     ctrl_area->instrs.instrs.INVLPG = 1;
401     ctrl_area->instrs.instrs.INVLPGA = 1;
402
403     guest_state->g_pat = 0x7040600070406ULL;
404
405     guest_state->cr0 |= 0x80000000;
406   } else if (vm_info.page_mode == NESTED_PAGING) {
407     // Flush the TLB on entries/exits
408     //ctrl_area->TLB_CONTROL = 1;
409
410     // Enable Nested Paging
411     //ctrl_area->NP_ENABLE = 1;
412
413     //PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
414
415         // Set the Nested Page Table pointer
416     //    ctrl_area->N_CR3 = ((addr_t)vm_info.page_tables);
417     // ctrl_area->N_CR3 = (addr_t)(vm_info.page_tables);
418
419     //   ctrl_area->N_CR3 = Get_CR3();
420     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
421
422     //    guest_state->g_pat = 0x7040600070406ULL;
423   }
424
425
426
427 }
428
429 void Init_VMCB_pe(vmcb_t *vmcb, guest_info_t vm_info) {
430   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
431   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
432   uint_t i = 0;
433
434
435   guest_state->rsp = vm_info.rsp;
436   guest_state->rip = vm_info.rip;
437
438
439   /* I pretty much just gutted this from TVMM */
440   /* Note: That means its probably wrong */
441
442   // set the segment registers to mirror ours
443   guest_state->cs.selector = 1<<3;
444   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
445   guest_state->cs.attrib.fields.S = 1;
446   guest_state->cs.attrib.fields.P = 1;
447   guest_state->cs.attrib.fields.db = 1;
448   guest_state->cs.attrib.fields.G = 1;
449   guest_state->cs.limit = 0xfffff;
450   guest_state->cs.base = 0;
451   
452   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
453   for ( i = 0; segregs[i] != NULL; i++) {
454     struct vmcb_selector * seg = segregs[i];
455     
456     seg->selector = 2<<3;
457     seg->attrib.fields.type = 0x2; // Data Segment+read/write
458     seg->attrib.fields.S = 1;
459     seg->attrib.fields.P = 1;
460     seg->attrib.fields.db = 1;
461     seg->attrib.fields.G = 1;
462     seg->limit = 0xfffff;
463     seg->base = 0;
464   }
465
466
467   {
468     /* JRL THIS HAS TO GO */
469     
470     guest_state->tr.selector = GetTR_Selector();
471     guest_state->tr.attrib.fields.type = 0x9; 
472     guest_state->tr.attrib.fields.P = 1;
473     guest_state->tr.limit = GetTR_Limit();
474     guest_state->tr.base = GetTR_Base();// - 0x2000;
475     /* ** */
476   }
477
478
479   /* ** */
480
481
482   guest_state->efer |= EFER_MSR_svm_enable;
483   guest_state->rflags = 0x00000002; // The reserved bit is always 1
484   ctrl_area->svm_instrs.instrs.VMRUN = 1;
485   guest_state->cr0 = 0x00000001;    // PE 
486   ctrl_area->guest_ASID = 1;
487
488
489   //  guest_state->cpl = 0;
490
491
492
493   // Setup exits
494
495   ctrl_area->cr_writes.crs.cr4 = 1;
496   
497   ctrl_area->exceptions.ex_names.de = 1;
498   ctrl_area->exceptions.ex_names.df = 1;
499   ctrl_area->exceptions.ex_names.pf = 1;
500   ctrl_area->exceptions.ex_names.ts = 1;
501   ctrl_area->exceptions.ex_names.ss = 1;
502   ctrl_area->exceptions.ex_names.ac = 1;
503   ctrl_area->exceptions.ex_names.mc = 1;
504   ctrl_area->exceptions.ex_names.gp = 1;
505   ctrl_area->exceptions.ex_names.ud = 1;
506   ctrl_area->exceptions.ex_names.np = 1;
507   ctrl_area->exceptions.ex_names.of = 1;
508   ctrl_area->exceptions.ex_names.nmi = 1;
509
510   
511
512   ctrl_area->instrs.instrs.IOIO_PROT = 1;
513   ctrl_area->IOPM_BASE_PA = (uint_t)os_hooks->allocate_pages(3);
514   
515   {
516     reg_ex_t tmp_reg;
517     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
518     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
519   }
520
521   ctrl_area->instrs.instrs.INTR = 1;
522
523   
524   {
525     char gdt_buf[6];
526     char idt_buf[6];
527
528     memset(gdt_buf, 0, 6);
529     memset(idt_buf, 0, 6);
530
531
532     uint_t gdt_base, idt_base;
533     ushort_t gdt_limit, idt_limit;
534     
535     GetGDTR(gdt_buf);
536     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
537     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
538     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
539
540     GetIDTR(idt_buf);
541     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
542     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
543     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
544
545
546     // gdt_base -= 0x2000;
547     //idt_base -= 0x2000;
548
549     guest_state->gdtr.base = gdt_base;
550     guest_state->gdtr.limit = gdt_limit;
551     guest_state->idtr.base = idt_base;
552     guest_state->idtr.limit = idt_limit;
553
554
555   }
556   
557   
558   // also determine if CPU supports nested paging
559   /*
560   if (vm_info.page_tables) {
561     //   if (0) {
562     // Flush the TLB on entries/exits
563     ctrl_area->TLB_CONTROL = 1;
564
565     // Enable Nested Paging
566     ctrl_area->NP_ENABLE = 1;
567
568     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
569
570         // Set the Nested Page Table pointer
571     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
572
573
574     //   ctrl_area->N_CR3 = Get_CR3();
575     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
576
577     guest_state->g_pat = 0x7040600070406ULL;
578
579     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
580     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
581     // Enable Paging
582     //    guest_state->cr0 |= 0x80000000;
583   }
584   */
585
586 }
587
588