Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


code restructuring
[palacios.git] / palacios / src / palacios / svm.c
1 #include <palacios/svm.h>
2 #include <palacios/vmm.h>
3
4 #include <palacios/vmcb.h>
5 #include <palacios/vmm_mem.h>
6 #include <palacios/vmm_paging.h>
7 #include <palacios/svm_handler.h>
8
9 #include <palacios/vmm_debug.h>
10 #include <palacios/vm_guest_mem.h>
11
12
13 /* TEMPORARY BECAUSE SVM IS WEIRD */
14 //#include <palacios/tss.h>
15 /* ** */
16
17 extern struct vmm_os_hooks * os_hooks;
18
19 extern uint_t cpuid_ecx(uint_t op);
20 extern uint_t cpuid_edx(uint_t op);
21 extern void Get_MSR(uint_t MSR, uint_t * high_byte, uint_t * low_byte); 
22 extern void Set_MSR(uint_t MSR, uint_t high_byte, uint_t low_byte);
23 extern uint_t launch_svm(vmcb_t * vmcb_addr);
24 extern void safe_svm_launch(vmcb_t * vmcb_addr, struct guest_gprs * gprs);
25
26 extern uint_t Get_CR3();
27
28 extern void GetGDTR(void * gdt);
29 extern void GetIDTR(void * idt);
30
31 extern void DisableInts();
32
33 /* Checks machine SVM capability */
34 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
35 int is_svm_capable() {
36   uint_t ret =  cpuid_ecx(CPUID_FEATURE_IDS);
37   uint_t vm_cr_low = 0, vm_cr_high = 0;
38
39
40   if ((ret & CPUID_FEATURE_IDS_ecx_svm_avail) == 0) {
41     PrintDebug("SVM Not Available\n");
42     return 0;
43   } 
44
45   Get_MSR(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
46
47   if ((ret & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 1) {
48     PrintDebug("Nested Paging not supported\n");
49   }
50
51   if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 0) {
52     return 1;
53   }
54
55   ret = cpuid_edx(CPUID_SVM_REV_AND_FEATURE_IDS);
56
57   if ((ret & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
58     PrintDebug("SVM BIOS Disabled, not unlockable\n");
59   } else {
60     PrintDebug("SVM is locked with a key\n");
61   }
62
63   return 0;
64 }
65
66
67
68 void Init_SVM(struct vmm_ctrl_ops * vmm_ops) {
69   reg_ex_t msr;
70   void * host_state;
71
72
73   // Enable SVM on the CPU
74   Get_MSR(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
75   msr.e_reg.low |= EFER_MSR_svm_enable;
76   Set_MSR(EFER_MSR, 0, msr.e_reg.low);
77   
78   PrintDebug("SVM Enabled\n");
79
80
81   // Setup the host state save area
82   host_state = os_hooks->allocate_pages(4);
83   
84   msr.e_reg.high = 0;
85   msr.e_reg.low = (uint_t)host_state;
86
87
88   PrintDebug("Host State being saved at %x\n", (uint_t)host_state);
89   Set_MSR(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
90
91
92
93   // Setup the SVM specific vmm operations
94   vmm_ops->init_guest = &init_svm_guest;
95   vmm_ops->start_guest = &start_svm_guest;
96
97
98   return;
99 }
100
101
102 int init_svm_guest(struct guest_info *info) {
103  
104   PrintDebug("Allocating VMCB\n");
105   info->vmm_data = (void*)Allocate_VMCB();
106
107
108   //PrintDebug("Generating Guest nested page tables\n");
109   //  info->page_tables = NULL;
110   //info->page_tables = generate_guest_page_tables_64(&(info->mem_layout), &(info->mem_list));
111   //info->page_tables = generate_guest_page_tables(&(info->mem_layout), &(info->mem_list));
112   //PrintDebugPageTables(info->page_tables);
113
114
115   PrintDebug("Initializing VMCB (addr=%x)\n", info->vmm_data);
116   Init_VMCB((vmcb_t*)(info->vmm_data), *info);
117   
118   //  info->rip = 0;
119
120   info->vm_regs.rdi = 0;
121   info->vm_regs.rsi = 0;
122   info->vm_regs.rbp = 0;
123   info->vm_regs.rsp = 0;
124   info->vm_regs.rbx = 0;
125   info->vm_regs.rdx = 0;
126   info->vm_regs.rcx = 0;
127   info->vm_regs.rax = 0;
128   
129   return 0;
130 }
131
132
133 // can we start a kernel thread here...
134 int start_svm_guest(struct guest_info *info) {
135
136
137
138   PrintDebug("Launching SVM VM (vmcb=%x)\n", info->vmm_data);
139   //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
140
141   while (1) {
142
143     PrintDebug("SVM Launch Args (vmcb=%x), (info=%x), (vm_regs=%x)\n", info->vmm_data,  &(info->vm_regs));
144     PrintDebug("Launching to RIP: %x\n", info->rip);
145     safe_svm_launch((vmcb_t*)(info->vmm_data), &(info->vm_regs));
146     //launch_svm((vmcb_t*)(info->vmm_data));
147     PrintDebug("SVM Returned\n");
148
149     if (handle_svm_exit(info) != 0) {
150       // handle exit code....
151       break;
152     }
153   }
154   return 0;
155 }
156
157
158
159 vmcb_t * Allocate_VMCB() {
160   vmcb_t * vmcb_page = (vmcb_t*)os_hooks->allocate_pages(1);
161
162
163   memset(vmcb_page, 0, 4096);
164
165   return vmcb_page;
166 }
167
168
169 void Init_VMCB_Real(vmcb_t * vmcb, struct guest_info vm_info) {
170   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
171   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
172   uint_t i;
173
174
175   guest_state->rsp = vm_info.vm_regs.rsp;
176   guest_state->rip = vm_info.rip;
177
178
179   guest_state->efer |= EFER_MSR_svm_enable;
180   guest_state->rflags = 0x00000002; // The reserved bit is always 1
181   ctrl_area->svm_instrs.instrs.VMRUN = 1;
182   ctrl_area->guest_ASID = 1;
183   guest_state->cr0 = 0x60000010;
184
185
186   ctrl_area->exceptions.ex_names.de = 1;
187   ctrl_area->exceptions.ex_names.df = 1;
188   ctrl_area->exceptions.ex_names.pf = 1;
189   ctrl_area->exceptions.ex_names.ts = 1;
190   ctrl_area->exceptions.ex_names.ss = 1;
191   ctrl_area->exceptions.ex_names.ac = 1;
192   ctrl_area->exceptions.ex_names.mc = 1;
193   ctrl_area->exceptions.ex_names.gp = 1;
194   ctrl_area->exceptions.ex_names.ud = 1;
195   ctrl_area->exceptions.ex_names.np = 1;
196   ctrl_area->exceptions.ex_names.of = 1;
197   ctrl_area->exceptions.ex_names.nmi = 1;
198
199   guest_state->cs.selector = 0xf000;
200   guest_state->cs.limit=0xffff;
201   guest_state->cs.base =  0xffff0000;
202   guest_state->cs.attrib.raw = 0x9a;
203
204   
205   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
206   for ( i = 0; segregs[i] != NULL; i++) {
207     struct vmcb_selector * seg = segregs[i];
208     
209     seg->selector = 0x0000;
210     seg->base = 0xffff0000;
211     seg->attrib.raw = 0x9b;
212     seg->limit = 0xffff;
213   }
214   
215   /* Set GPRs */
216   /*
217     EDX == 0xfxx
218     EAX, EBX, ECX, ESI, EDI, EBP, ESP == 0x0
219   */
220
221   guest_state->gdtr.base = 0;
222   guest_state->gdtr.limit = 0xffff;
223   guest_state->gdtr.attrib.raw = 0x0;
224
225   guest_state->idtr.base = 0;
226   guest_state->idtr.limit = 0xffff;
227   guest_state->idtr.attrib.raw = 0x0;
228
229   guest_state->ldtr.base = 0;
230   guest_state->ldtr.limit = 0xffff;
231   guest_state->ldtr.attrib.raw = 0x82;
232
233   guest_state->tr.base = 0;
234   guest_state->tr.limit = 0xffff;
235   guest_state->tr.attrib.raw = 0x83;
236
237
238
239
240   if (vm_info.io_map.num_ports > 0) {
241     vmm_io_hook_t * iter;
242     addr_t io_port_bitmap;
243     
244     io_port_bitmap = (addr_t)os_hooks->allocate_pages(3);
245     memset((uchar_t*)io_port_bitmap, 0, PAGE_SIZE * 3);
246     
247     ctrl_area->IOPM_BASE_PA = io_port_bitmap;
248
249     //PrintDebug("Setting up IO Map at 0x%x\n", io_port_bitmap);
250
251     FOREACH_IO_HOOK(vm_info.io_map, iter) {
252       ushort_t port = iter->port;
253       uchar_t * bitmap = (uchar_t *)io_port_bitmap;
254
255       bitmap += (port / 8);
256       PrintDebug("Setting Bit in block %x\n", bitmap);
257       *bitmap |= 1 << (port % 8);
258     }
259
260     ctrl_area->instrs.instrs.IOIO_PROT = 1;
261   }
262
263   ctrl_area->instrs.instrs.INTR = 1;
264
265   // also determine if CPU supports nested paging
266
267   if (vm_info.page_mode == SHADOW_PAGING) {
268     PrintDebug("Creating initial shadow page table\n");
269     vm_info.shdw_pg_state.shadow_cr3.e_reg.low |= ((addr_t)create_passthrough_pde32_pts(&vm_info) & ~0xfff);
270     PrintDebug("Created\n");
271
272     guest_state->cr3 = vm_info.shdw_pg_state.shadow_cr3.r_reg;
273
274     ctrl_area->cr_reads.crs.cr3 = 1;
275     ctrl_area->cr_writes.crs.cr3 = 1;
276     ctrl_area->cr_reads.crs.cr0 = 1;
277     ctrl_area->cr_writes.crs.cr0 = 1;
278
279     ctrl_area->instrs.instrs.INVLPG = 1;
280     ctrl_area->instrs.instrs.INVLPGA = 1;
281
282         
283     guest_state->g_pat = 0x7040600070406ULL;
284
285     vm_info.shdw_pg_state.guest_cr0.e_reg.low = guest_state->cr0;
286     guest_state->cr0 |= 0x80000000;
287   } else if (vm_info.page_mode == NESTED_PAGING) {
288     // Flush the TLB on entries/exits
289     //ctrl_area->TLB_CONTROL = 1;
290
291     // Enable Nested Paging
292     //ctrl_area->NP_ENABLE = 1;
293
294     //PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
295
296         // Set the Nested Page Table pointer
297     //    ctrl_area->N_CR3 = ((addr_t)vm_info.page_tables);
298     // ctrl_area->N_CR3 = (addr_t)(vm_info.page_tables);
299
300     //   ctrl_area->N_CR3 = Get_CR3();
301     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
302
303     //    guest_state->g_pat = 0x7040600070406ULL;
304   }
305
306 }
307
308
309 void Init_VMCB(vmcb_t * vmcb, struct guest_info vm_info) {
310   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
311   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
312   uint_t i;
313
314
315   guest_state->rsp = vm_info.vm_regs.rsp;
316   guest_state->rip = vm_info.rip;
317
318
319   //ctrl_area->instrs.instrs.CR0 = 1;
320   ctrl_area->cr_reads.crs.cr0 = 1;
321   ctrl_area->cr_writes.crs.cr0 = 1;
322
323   guest_state->efer |= EFER_MSR_svm_enable;
324   guest_state->rflags = 0x00000002; // The reserved bit is always 1
325   ctrl_area->svm_instrs.instrs.VMRUN = 1;
326   // guest_state->cr0 = 0x00000001;    // PE 
327   ctrl_area->guest_ASID = 1;
328
329
330   ctrl_area->exceptions.ex_names.de = 1;
331   ctrl_area->exceptions.ex_names.df = 1;
332   ctrl_area->exceptions.ex_names.pf = 1;
333   ctrl_area->exceptions.ex_names.ts = 1;
334   ctrl_area->exceptions.ex_names.ss = 1;
335   ctrl_area->exceptions.ex_names.ac = 1;
336   ctrl_area->exceptions.ex_names.mc = 1;
337   ctrl_area->exceptions.ex_names.gp = 1;
338   ctrl_area->exceptions.ex_names.ud = 1;
339   ctrl_area->exceptions.ex_names.np = 1;
340   ctrl_area->exceptions.ex_names.of = 1;
341   ctrl_area->exceptions.ex_names.nmi = 1;
342
343   guest_state->cs.selector = 0x0000;
344   guest_state->cs.limit=~0u;
345   guest_state->cs.base = guest_state->cs.selector<<4;
346   guest_state->cs.attrib.raw = 0xf3;
347
348   
349   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
350   for ( i = 0; segregs[i] != NULL; i++) {
351     struct vmcb_selector * seg = segregs[i];
352     
353     seg->selector = 0x0000;
354     seg->base = seg->selector << 4;
355     seg->attrib.raw = 0xf3;
356     seg->limit = ~0u;
357   }
358   
359   if (vm_info.io_map.num_ports > 0) {
360     vmm_io_hook_t * iter;
361     addr_t io_port_bitmap;
362     
363     io_port_bitmap = (addr_t)os_hooks->allocate_pages(3);
364     memset((uchar_t*)io_port_bitmap, 0, PAGE_SIZE * 3);
365     
366     ctrl_area->IOPM_BASE_PA = io_port_bitmap;
367
368     //PrintDebug("Setting up IO Map at 0x%x\n", io_port_bitmap);
369
370     FOREACH_IO_HOOK(vm_info.io_map, iter) {
371       ushort_t port = iter->port;
372       uchar_t * bitmap = (uchar_t *)io_port_bitmap;
373
374       bitmap += (port / 8);
375       PrintDebug("Setting Bit in block %x\n", bitmap);
376       *bitmap |= 1 << (port % 8);
377     }
378
379
380     //PrintDebugMemDump((uchar_t*)io_port_bitmap, PAGE_SIZE *2);
381
382     ctrl_area->instrs.instrs.IOIO_PROT = 1;
383   }
384
385   ctrl_area->instrs.instrs.INTR = 1;
386
387
388
389   if (vm_info.page_mode == SHADOW_PAGING) {
390     PrintDebug("Creating initial shadow page table\n");
391     vm_info.shdw_pg_state.shadow_cr3.e_reg.low |= ((addr_t)create_passthrough_pde32_pts(&vm_info) & ~0xfff);
392     PrintDebug("Created\n");
393
394     guest_state->cr3 = vm_info.shdw_pg_state.shadow_cr3.r_reg;
395
396     ctrl_area->cr_reads.crs.cr3 = 1;
397     ctrl_area->cr_writes.crs.cr3 = 1;
398
399
400     ctrl_area->instrs.instrs.INVLPG = 1;
401     ctrl_area->instrs.instrs.INVLPGA = 1;
402
403     guest_state->g_pat = 0x7040600070406ULL;
404
405     guest_state->cr0 |= 0x80000000;
406   } else if (vm_info.page_mode == NESTED_PAGING) {
407     // Flush the TLB on entries/exits
408     //ctrl_area->TLB_CONTROL = 1;
409
410     // Enable Nested Paging
411     //ctrl_area->NP_ENABLE = 1;
412
413     //PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
414
415         // Set the Nested Page Table pointer
416     //    ctrl_area->N_CR3 = ((addr_t)vm_info.page_tables);
417     // ctrl_area->N_CR3 = (addr_t)(vm_info.page_tables);
418
419     //   ctrl_area->N_CR3 = Get_CR3();
420     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
421
422     //    guest_state->g_pat = 0x7040600070406ULL;
423   }
424
425
426
427 }
428
429 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
430   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
431   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
432   uint_t i = 0;
433
434
435   guest_state->rsp = vm_info.vm_regs.rsp;
436   guest_state->rip = vm_info.rip;
437
438
439   /* I pretty much just gutted this from TVMM */
440   /* Note: That means its probably wrong */
441
442   // set the segment registers to mirror ours
443   guest_state->cs.selector = 1<<3;
444   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
445   guest_state->cs.attrib.fields.S = 1;
446   guest_state->cs.attrib.fields.P = 1;
447   guest_state->cs.attrib.fields.db = 1;
448   guest_state->cs.attrib.fields.G = 1;
449   guest_state->cs.limit = 0xfffff;
450   guest_state->cs.base = 0;
451   
452   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
453   for ( i = 0; segregs[i] != NULL; i++) {
454     struct vmcb_selector * seg = segregs[i];
455     
456     seg->selector = 2<<3;
457     seg->attrib.fields.type = 0x2; // Data Segment+read/write
458     seg->attrib.fields.S = 1;
459     seg->attrib.fields.P = 1;
460     seg->attrib.fields.db = 1;
461     seg->attrib.fields.G = 1;
462     seg->limit = 0xfffff;
463     seg->base = 0;
464   }
465
466
467   {
468     /* JRL THIS HAS TO GO */
469     
470     //    guest_state->tr.selector = GetTR_Selector();
471     guest_state->tr.attrib.fields.type = 0x9; 
472     guest_state->tr.attrib.fields.P = 1;
473     // guest_state->tr.limit = GetTR_Limit();
474     //guest_state->tr.base = GetTR_Base();// - 0x2000;
475     /* ** */
476   }
477
478
479   /* ** */
480
481
482   guest_state->efer |= EFER_MSR_svm_enable;
483   guest_state->rflags = 0x00000002; // The reserved bit is always 1
484   ctrl_area->svm_instrs.instrs.VMRUN = 1;
485   guest_state->cr0 = 0x00000001;    // PE 
486   ctrl_area->guest_ASID = 1;
487
488
489   //  guest_state->cpl = 0;
490
491
492
493   // Setup exits
494
495   ctrl_area->cr_writes.crs.cr4 = 1;
496   
497   ctrl_area->exceptions.ex_names.de = 1;
498   ctrl_area->exceptions.ex_names.df = 1;
499   ctrl_area->exceptions.ex_names.pf = 1;
500   ctrl_area->exceptions.ex_names.ts = 1;
501   ctrl_area->exceptions.ex_names.ss = 1;
502   ctrl_area->exceptions.ex_names.ac = 1;
503   ctrl_area->exceptions.ex_names.mc = 1;
504   ctrl_area->exceptions.ex_names.gp = 1;
505   ctrl_area->exceptions.ex_names.ud = 1;
506   ctrl_area->exceptions.ex_names.np = 1;
507   ctrl_area->exceptions.ex_names.of = 1;
508   ctrl_area->exceptions.ex_names.nmi = 1;
509
510   
511
512   ctrl_area->instrs.instrs.IOIO_PROT = 1;
513   ctrl_area->IOPM_BASE_PA = (uint_t)os_hooks->allocate_pages(3);
514   
515   {
516     reg_ex_t tmp_reg;
517     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
518     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
519   }
520
521   ctrl_area->instrs.instrs.INTR = 1;
522
523   
524   {
525     char gdt_buf[6];
526     char idt_buf[6];
527
528     memset(gdt_buf, 0, 6);
529     memset(idt_buf, 0, 6);
530
531
532     uint_t gdt_base, idt_base;
533     ushort_t gdt_limit, idt_limit;
534     
535     GetGDTR(gdt_buf);
536     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
537     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
538     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
539
540     GetIDTR(idt_buf);
541     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
542     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
543     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
544
545
546     // gdt_base -= 0x2000;
547     //idt_base -= 0x2000;
548
549     guest_state->gdtr.base = gdt_base;
550     guest_state->gdtr.limit = gdt_limit;
551     guest_state->idtr.base = idt_base;
552     guest_state->idtr.limit = idt_limit;
553
554
555   }
556   
557   
558   // also determine if CPU supports nested paging
559   /*
560   if (vm_info.page_tables) {
561     //   if (0) {
562     // Flush the TLB on entries/exits
563     ctrl_area->TLB_CONTROL = 1;
564
565     // Enable Nested Paging
566     ctrl_area->NP_ENABLE = 1;
567
568     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
569
570         // Set the Nested Page Table pointer
571     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
572
573
574     //   ctrl_area->N_CR3 = Get_CR3();
575     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
576
577     guest_state->g_pat = 0x7040600070406ULL;
578
579     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
580     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
581     // Enable Paging
582     //    guest_state->cr0 |= 0x80000000;
583   }
584   */
585
586 }
587
588