Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


added memory conversions and copies for the guest/host contexts
[palacios.git] / palacios / src / geekos / svm.c
1 #include <geekos/svm.h>
2 #include <geekos/vmm.h>
3
4 #include <geekos/vmcb.h>
5 #include <geekos/vmm_mem.h>
6 #include <geekos/vmm_paging.h>
7 #include <geekos/svm_handler.h>
8
9 #include <geekos/vmm_debug.h>
10 #include <geekos/vm_guest_mem.h>
11
12
13 /* TEMPORARY BECAUSE SVM IS WEIRD */
14 #include <geekos/tss.h>
15 /* ** */
16
17 extern struct vmm_os_hooks * os_hooks;
18
19 extern uint_t cpuid_ecx(uint_t op);
20 extern uint_t cpuid_edx(uint_t op);
21 extern void Get_MSR(uint_t MSR, uint_t * high_byte, uint_t * low_byte); 
22 extern void Set_MSR(uint_t MSR, uint_t high_byte, uint_t low_byte);
23 extern uint_t launch_svm(vmcb_t * vmcb_addr);
24 extern void safe_svm_launch(vmcb_t * vmcb_addr, struct guest_gprs * gprs);
25
26 extern uint_t Get_CR3();
27
28 extern void GetGDTR(void * gdt);
29 extern void GetIDTR(void * idt);
30
31 extern void DisableInts();
32
33 /* Checks machine SVM capability */
34 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
35 int is_svm_capable() {
36   uint_t ret =  cpuid_ecx(CPUID_FEATURE_IDS);
37   uint_t vm_cr_low = 0, vm_cr_high = 0;
38
39
40   if ((ret & CPUID_FEATURE_IDS_ecx_svm_avail) == 0) {
41     PrintDebug("SVM Not Available\n");
42     return 0;
43   } 
44
45   Get_MSR(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
46
47   if ((ret & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 1) {
48     PrintDebug("Nested Paging not supported\n");
49   }
50
51   if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 0) {
52     return 1;
53   }
54
55   ret = cpuid_edx(CPUID_SVM_REV_AND_FEATURE_IDS);
56
57   if ((ret & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
58     PrintDebug("SVM BIOS Disabled, not unlockable\n");
59   } else {
60     PrintDebug("SVM is locked with a key\n");
61   }
62
63   return 0;
64 }
65
66
67
68 void Init_SVM(struct vmm_ctrl_ops * vmm_ops) {
69   reg_ex_t msr;
70   void * host_state;
71
72
73   // Enable SVM on the CPU
74   Get_MSR(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
75   msr.e_reg.low |= EFER_MSR_svm_enable;
76   Set_MSR(EFER_MSR, 0, msr.e_reg.low);
77   
78   PrintDebug("SVM Enabled\n");
79
80
81   // Setup the host state save area
82   host_state = os_hooks->allocate_pages(4);
83   
84   msr.e_reg.high = 0;
85   msr.e_reg.low = (uint_t)host_state;
86
87
88   PrintDebug("Host State being saved at %x\n", (uint_t)host_state);
89   Set_MSR(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
90
91
92
93   // Setup the SVM specific vmm operations
94   vmm_ops->init_guest = &init_svm_guest;
95   vmm_ops->start_guest = &start_svm_guest;
96
97
98   return;
99 }
100
101
102 int init_svm_guest(struct guest_info *info) {
103  
104   PrintDebug("Allocating VMCB\n");
105   info->vmm_data = (void*)Allocate_VMCB();
106
107
108   //PrintDebug("Generating Guest nested page tables\n");
109   //  info->page_tables = NULL;
110   //info->page_tables = generate_guest_page_tables_64(&(info->mem_layout), &(info->mem_list));
111   //info->page_tables = generate_guest_page_tables(&(info->mem_layout), &(info->mem_list));
112   //PrintDebugPageTables(info->page_tables);
113
114   
115
116   PrintDebug("Initializing VMCB (addr=%x)\n", info->vmm_data);
117   Init_VMCB((vmcb_t*)(info->vmm_data), *info);
118   
119   
120   info->vm_regs.rbx = 0;
121   info->vm_regs.rcx = 0;
122   info->vm_regs.rdx = 0;
123   info->vm_regs.rsi = 0;
124   info->vm_regs.rdi = 0;
125   info->vm_regs.rbp = 0;
126
127   return 0;
128 }
129
130
131 // can we start a kernel thread here...
132 int start_svm_guest(struct guest_info *info) {
133
134
135
136   PrintDebug("Launching SVM VM (vmcb=%x)\n", info->vmm_data);
137   //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
138
139   while (1) {
140
141     safe_svm_launch((vmcb_t*)(info->vmm_data), &(info->vm_regs));
142     //launch_svm((vmcb_t*)(info->vmm_data));
143     PrintDebug("SVM Returned\n");
144
145     if (handle_svm_exit(info) != 0) {
146       break;
147     }
148   }
149   return 0;
150 }
151
152
153
154 vmcb_t * Allocate_VMCB() {
155   vmcb_t * vmcb_page = (vmcb_t*)os_hooks->allocate_pages(1);
156
157
158   memset(vmcb_page, 0, 4096);
159
160   return vmcb_page;
161 }
162
163
164 void Init_VMCB_Real(vmcb_t * vmcb, struct guest_info vm_info) {
165   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
166   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
167   uint_t i;
168
169
170   guest_state->rsp = vm_info.rsp;
171   guest_state->rip = vm_info.rip;
172
173
174
175
176
177   guest_state->efer |= EFER_MSR_svm_enable;
178   guest_state->rflags = 0x00000002; // The reserved bit is always 1
179   ctrl_area->svm_instrs.instrs.VMRUN = 1;
180   // guest_state->cr0 = 0x00000001;    // PE 
181   ctrl_area->guest_ASID = 1;
182   guest_state->cr0 = 0x60000010;
183
184
185   ctrl_area->exceptions.ex_names.de = 1;
186   ctrl_area->exceptions.ex_names.df = 1;
187   ctrl_area->exceptions.ex_names.pf = 1;
188   ctrl_area->exceptions.ex_names.ts = 1;
189   ctrl_area->exceptions.ex_names.ss = 1;
190   ctrl_area->exceptions.ex_names.ac = 1;
191   ctrl_area->exceptions.ex_names.mc = 1;
192   ctrl_area->exceptions.ex_names.gp = 1;
193   ctrl_area->exceptions.ex_names.ud = 1;
194   ctrl_area->exceptions.ex_names.np = 1;
195   ctrl_area->exceptions.ex_names.of = 1;
196   ctrl_area->exceptions.ex_names.nmi = 1;
197
198   guest_state->cs.selector = 0xf000;
199   guest_state->cs.limit=0xffff;
200   guest_state->cs.base =  0xffff0000;
201   guest_state->cs.attrib.raw = 0x9a;
202
203   
204   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
205   for ( i = 0; segregs[i] != NULL; i++) {
206     struct vmcb_selector * seg = segregs[i];
207     
208     seg->selector = 0x0000;
209     seg->base = 0xffff0000;
210     seg->attrib.raw = 0x9b;
211     seg->limit = 0xffff;
212   }
213   
214   /* Set GPRs */
215   /*
216     EDX == 0xfxx
217     EAX, EBX, ECX, ESI, EDI, EBP, ESP == 0x0
218   */
219
220   guest_state->gdtr.base = 0;
221   guest_state->gdtr.limit = 0xffff;
222   guest_state->gdtr.attrib.raw = 0x0;
223
224   guest_state->idtr.base = 0;
225   guest_state->idtr.limit = 0xffff;
226   guest_state->idtr.attrib.raw = 0x0;
227
228   guest_state->ldtr.base = 0;
229   guest_state->ldtr.limit = 0xffff;
230   guest_state->ldtr.attrib.raw = 0x82;
231
232   guest_state->tr.base = 0;
233   guest_state->tr.limit = 0xffff;
234   guest_state->tr.attrib.raw = 0x83;
235
236
237
238
239   if (vm_info.io_map.num_ports > 0) {
240     vmm_io_hook_t * iter;
241     addr_t io_port_bitmap;
242     
243     io_port_bitmap = (addr_t)os_hooks->allocate_pages(3);
244     memset((uchar_t*)io_port_bitmap, 0, PAGE_SIZE * 3);
245     
246     ctrl_area->IOPM_BASE_PA = io_port_bitmap;
247
248     //PrintDebug("Setting up IO Map at 0x%x\n", io_port_bitmap);
249
250     FOREACH_IO_HOOK(vm_info.io_map, iter) {
251       ushort_t port = iter->port;
252       uchar_t * bitmap = (uchar_t *)io_port_bitmap;
253
254       bitmap += (port / 8);
255       PrintDebug("Setting Bit in block %x\n", bitmap);
256       *bitmap |= 1 << (port % 8);
257     }
258
259     //    memset((uchar_t*)io_port_bitmap, 0xff, PAGE_SIZE * 2);
260     //PrintDebugMemDump((uchar_t*)io_port_bitmap, PAGE_SIZE *2);
261
262     ctrl_area->instrs.instrs.IOIO_PROT = 1;
263   }
264
265   ctrl_area->instrs.instrs.INTR = 1;
266
267   // also determine if CPU supports nested paging
268
269   if (vm_info.page_mode == SHADOW_PAGING) {
270     PrintDebug("Creating initial shadow page table\n");
271     vm_info.shdw_pg_state.shadow_cr3.e_reg.low |= ((addr_t)create_passthrough_pde32_pts(&vm_info) & ~0xfff);
272     PrintDebug("Created\n");
273
274     guest_state->cr3 = vm_info.shdw_pg_state.shadow_cr3.r_reg;
275
276     ctrl_area->cr_reads.crs.cr3 = 1;
277     ctrl_area->cr_writes.crs.cr3 = 1;
278     ctrl_area->cr_reads.crs.cr0 = 1;
279     ctrl_area->cr_writes.crs.cr0 = 1;
280
281     ctrl_area->instrs.instrs.INVLPG = 1;
282     ctrl_area->instrs.instrs.INVLPGA = 1;
283
284         
285     guest_state->g_pat = 0x7040600070406ULL;
286
287     guest_state->cr0 |= 0x80000000;
288   } else if (vm_info.page_mode == NESTED_PAGING) {
289     // Flush the TLB on entries/exits
290     //ctrl_area->TLB_CONTROL = 1;
291
292     // Enable Nested Paging
293     //ctrl_area->NP_ENABLE = 1;
294
295     //PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
296
297         // Set the Nested Page Table pointer
298     //    ctrl_area->N_CR3 = ((addr_t)vm_info.page_tables);
299     // ctrl_area->N_CR3 = (addr_t)(vm_info.page_tables);
300
301     //   ctrl_area->N_CR3 = Get_CR3();
302     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
303
304     //    guest_state->g_pat = 0x7040600070406ULL;
305   }
306
307 }
308
309
310 void Init_VMCB(vmcb_t * vmcb, struct guest_info vm_info) {
311   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
312   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
313   uint_t i;
314
315
316   guest_state->rsp = vm_info.rsp;
317   guest_state->rip = vm_info.rip;
318
319
320   //ctrl_area->instrs.instrs.CR0 = 1;
321   ctrl_area->cr_reads.crs.cr0 = 1;
322   ctrl_area->cr_writes.crs.cr0 = 1;
323
324   guest_state->efer |= EFER_MSR_svm_enable;
325   guest_state->rflags = 0x00000002; // The reserved bit is always 1
326   ctrl_area->svm_instrs.instrs.VMRUN = 1;
327   // guest_state->cr0 = 0x00000001;    // PE 
328   ctrl_area->guest_ASID = 1;
329
330
331   ctrl_area->exceptions.ex_names.de = 1;
332   ctrl_area->exceptions.ex_names.df = 1;
333   ctrl_area->exceptions.ex_names.pf = 1;
334   ctrl_area->exceptions.ex_names.ts = 1;
335   ctrl_area->exceptions.ex_names.ss = 1;
336   ctrl_area->exceptions.ex_names.ac = 1;
337   ctrl_area->exceptions.ex_names.mc = 1;
338   ctrl_area->exceptions.ex_names.gp = 1;
339   ctrl_area->exceptions.ex_names.ud = 1;
340   ctrl_area->exceptions.ex_names.np = 1;
341   ctrl_area->exceptions.ex_names.of = 1;
342   ctrl_area->exceptions.ex_names.nmi = 1;
343
344   guest_state->cs.selector = 0x0000;
345   guest_state->cs.limit=~0u;
346   guest_state->cs.base = guest_state->cs.selector<<4;
347   guest_state->cs.attrib.raw = 0xf3;
348
349   
350   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
351   for ( i = 0; segregs[i] != NULL; i++) {
352     struct vmcb_selector * seg = segregs[i];
353     
354     seg->selector = 0x0000;
355     seg->base = seg->selector << 4;
356     seg->attrib.raw = 0xf3;
357     seg->limit = ~0u;
358   }
359   
360   if (vm_info.io_map.num_ports > 0) {
361     vmm_io_hook_t * iter;
362     addr_t io_port_bitmap;
363     
364     io_port_bitmap = (addr_t)os_hooks->allocate_pages(3);
365     memset((uchar_t*)io_port_bitmap, 0, PAGE_SIZE * 3);
366     
367     ctrl_area->IOPM_BASE_PA = io_port_bitmap;
368
369     //PrintDebug("Setting up IO Map at 0x%x\n", io_port_bitmap);
370
371     FOREACH_IO_HOOK(vm_info.io_map, iter) {
372       ushort_t port = iter->port;
373       uchar_t * bitmap = (uchar_t *)io_port_bitmap;
374
375       bitmap += (port / 8);
376       PrintDebug("Setting Bit in block %x\n", bitmap);
377       *bitmap |= 1 << (port % 8);
378     }
379
380
381     //PrintDebugMemDump((uchar_t*)io_port_bitmap, PAGE_SIZE *2);
382
383     ctrl_area->instrs.instrs.IOIO_PROT = 1;
384   }
385
386   ctrl_area->instrs.instrs.INTR = 1;
387
388
389
390   if (vm_info.page_mode == SHADOW_PAGING) {
391     PrintDebug("Creating initial shadow page table\n");
392     vm_info.shdw_pg_state.shadow_cr3.e_reg.low |= ((addr_t)create_passthrough_pde32_pts(&vm_info) & ~0xfff);
393     PrintDebug("Created\n");
394
395     guest_state->cr3 = vm_info.shdw_pg_state.shadow_cr3.r_reg;
396
397     ctrl_area->cr_reads.crs.cr3 = 1;
398     ctrl_area->cr_writes.crs.cr3 = 1;
399
400
401     ctrl_area->instrs.instrs.INVLPG = 1;
402     ctrl_area->instrs.instrs.INVLPGA = 1;
403
404     guest_state->g_pat = 0x7040600070406ULL;
405
406     guest_state->cr0 |= 0x80000000;
407   } else if (vm_info.page_mode == NESTED_PAGING) {
408     // Flush the TLB on entries/exits
409     //ctrl_area->TLB_CONTROL = 1;
410
411     // Enable Nested Paging
412     //ctrl_area->NP_ENABLE = 1;
413
414     //PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
415
416         // Set the Nested Page Table pointer
417     //    ctrl_area->N_CR3 = ((addr_t)vm_info.page_tables);
418     // ctrl_area->N_CR3 = (addr_t)(vm_info.page_tables);
419
420     //   ctrl_area->N_CR3 = Get_CR3();
421     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
422
423     //    guest_state->g_pat = 0x7040600070406ULL;
424   }
425
426
427
428 }
429
430 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
431   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
432   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
433   uint_t i = 0;
434
435
436   guest_state->rsp = vm_info.rsp;
437   guest_state->rip = vm_info.rip;
438
439
440   /* I pretty much just gutted this from TVMM */
441   /* Note: That means its probably wrong */
442
443   // set the segment registers to mirror ours
444   guest_state->cs.selector = 1<<3;
445   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
446   guest_state->cs.attrib.fields.S = 1;
447   guest_state->cs.attrib.fields.P = 1;
448   guest_state->cs.attrib.fields.db = 1;
449   guest_state->cs.attrib.fields.G = 1;
450   guest_state->cs.limit = 0xfffff;
451   guest_state->cs.base = 0;
452   
453   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
454   for ( i = 0; segregs[i] != NULL; i++) {
455     struct vmcb_selector * seg = segregs[i];
456     
457     seg->selector = 2<<3;
458     seg->attrib.fields.type = 0x2; // Data Segment+read/write
459     seg->attrib.fields.S = 1;
460     seg->attrib.fields.P = 1;
461     seg->attrib.fields.db = 1;
462     seg->attrib.fields.G = 1;
463     seg->limit = 0xfffff;
464     seg->base = 0;
465   }
466
467
468   {
469     /* JRL THIS HAS TO GO */
470     
471     guest_state->tr.selector = GetTR_Selector();
472     guest_state->tr.attrib.fields.type = 0x9; 
473     guest_state->tr.attrib.fields.P = 1;
474     guest_state->tr.limit = GetTR_Limit();
475     guest_state->tr.base = GetTR_Base();// - 0x2000;
476     /* ** */
477   }
478
479
480   /* ** */
481
482
483   guest_state->efer |= EFER_MSR_svm_enable;
484   guest_state->rflags = 0x00000002; // The reserved bit is always 1
485   ctrl_area->svm_instrs.instrs.VMRUN = 1;
486   guest_state->cr0 = 0x00000001;    // PE 
487   ctrl_area->guest_ASID = 1;
488
489
490   //  guest_state->cpl = 0;
491
492
493
494   // Setup exits
495
496   ctrl_area->cr_writes.crs.cr4 = 1;
497   
498   ctrl_area->exceptions.ex_names.de = 1;
499   ctrl_area->exceptions.ex_names.df = 1;
500   ctrl_area->exceptions.ex_names.pf = 1;
501   ctrl_area->exceptions.ex_names.ts = 1;
502   ctrl_area->exceptions.ex_names.ss = 1;
503   ctrl_area->exceptions.ex_names.ac = 1;
504   ctrl_area->exceptions.ex_names.mc = 1;
505   ctrl_area->exceptions.ex_names.gp = 1;
506   ctrl_area->exceptions.ex_names.ud = 1;
507   ctrl_area->exceptions.ex_names.np = 1;
508   ctrl_area->exceptions.ex_names.of = 1;
509   ctrl_area->exceptions.ex_names.nmi = 1;
510
511   
512
513   ctrl_area->instrs.instrs.IOIO_PROT = 1;
514   ctrl_area->IOPM_BASE_PA = (uint_t)os_hooks->allocate_pages(3);
515   
516   {
517     reg_ex_t tmp_reg;
518     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
519     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
520   }
521
522   ctrl_area->instrs.instrs.INTR = 1;
523
524   
525   {
526     char gdt_buf[6];
527     char idt_buf[6];
528
529     memset(gdt_buf, 0, 6);
530     memset(idt_buf, 0, 6);
531
532
533     uint_t gdt_base, idt_base;
534     ushort_t gdt_limit, idt_limit;
535     
536     GetGDTR(gdt_buf);
537     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
538     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
539     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
540
541     GetIDTR(idt_buf);
542     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
543     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
544     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
545
546
547     // gdt_base -= 0x2000;
548     //idt_base -= 0x2000;
549
550     guest_state->gdtr.base = gdt_base;
551     guest_state->gdtr.limit = gdt_limit;
552     guest_state->idtr.base = idt_base;
553     guest_state->idtr.limit = idt_limit;
554
555
556   }
557   
558   
559   // also determine if CPU supports nested paging
560   /*
561   if (vm_info.page_tables) {
562     //   if (0) {
563     // Flush the TLB on entries/exits
564     ctrl_area->TLB_CONTROL = 1;
565
566     // Enable Nested Paging
567     ctrl_area->NP_ENABLE = 1;
568
569     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
570
571         // Set the Nested Page Table pointer
572     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
573
574
575     //   ctrl_area->N_CR3 = Get_CR3();
576     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
577
578     guest_state->g_pat = 0x7040600070406ULL;
579
580     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
581     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
582     // Enable Paging
583     //    guest_state->cr0 |= 0x80000000;
584   }
585   */
586
587 }
588
589