Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


90ff06057432b23208f410ebef914170f1204272
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Peter Dinda <pdinda@northwestern.edu> 
11  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
13  * All rights reserved.
14  *
15  * Author: Peter Dinda <pdinda@northwestern.edu>
16  *         Jack Lange <jarusl@cs.northwestern.edu>
17  *
18  * This is free software.  You are permitted to use,
19  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20  */
21
22
23 #include <palacios/vmx.h>
24 #include <palacios/vmm.h>
25 #include <palacios/vmcs.h>
26 #include <palacios/vmx_lowlevel.h>
27 #include <palacios/vmm_lowlevel.h>
28 #include <palacios/vmm_ctrl_regs.h>
29 #include <palacios/vmm_config.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34
35 static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = {0};
36
37
38 extern int v3_vmx_exit_handler();
39 extern int v3_vmx_vmlaunch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
40
41 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
42     int ret = 0;
43
44     ret = vmcs_write(field,val);
45
46     if (ret != VMX_SUCCESS) {
47         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
48         return 1;
49     }
50
51     return 0;
52 }
53
54 #if 0
55 // For the 32 bit reserved bit fields 
56 // MB1s are in the low 32 bits, MBZs are in the high 32 bits of the MSR
57 static uint32_t sanitize_bits1(uint32_t msr_num, uint32_t val) {
58     v3_msr_t mask_msr;
59
60     PrintDebug("sanitize_bits1 (MSR:%x)\n", msr_num);
61
62     v3_get_msr(msr_num, &mask_msr.hi, &mask_msr.lo);
63
64     PrintDebug("MSR %x = %x : %x \n", msr_num, mask_msr.hi, mask_msr.lo);
65
66     val |= mask_msr.lo;
67     val |= mask_msr.hi;
68   
69     return val;
70 }
71
72
73
74 static addr_t sanitize_bits2(uint32_t msr_num0, uint32_t msr_num1, addr_t val) {
75     v3_msr_t msr0, msr1;
76     addr_t msr0_val, msr1_val;
77
78     PrintDebug("sanitize_bits2 (MSR0=%x, MSR1=%x)\n", msr_num0, msr_num1);
79
80     v3_get_msr(msr_num0, &msr0.hi, &msr0.lo);
81     v3_get_msr(msr_num1, &msr1.hi, &msr1.lo);
82   
83     // This generates a mask that is the natural bit width of the CPU
84     msr0_val = msr0.value;
85     msr1_val = msr1.value;
86
87     PrintDebug("MSR %x = %p, %x = %p \n", msr_num0, (void*)msr0_val, msr_num1, (void*)msr1_val);
88
89     val |= msr0_val;
90     val |= msr1_val;
91
92     return val;
93 }
94
95
96
97 #endif
98
99
100 static addr_t allocate_vmcs() {
101     reg_ex_t msr;
102     struct vmcs_data * vmcs_page = NULL;
103
104     PrintDebug("Allocating page\n");
105
106     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
107     memset(vmcs_page, 0, 4096);
108
109     v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
110     
111     vmcs_page->revision = ((struct vmx_basic_msr*)&msr)->revision;
112     PrintDebug("VMX Revision: 0x%x\n",vmcs_page->revision);
113
114     return (addr_t)V3_PAddr((void *)vmcs_page);
115 }
116
117
118 static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config_ptr) {
119     struct vmx_data * vmx_info = NULL;
120     int vmx_ret = 0;
121
122     v3_pre_config_guest(info, config_ptr);
123
124     vmx_info = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
125
126     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_info);
127
128     PrintDebug("Allocating VMCS\n");
129     vmx_info->vmcs_ptr_phys = allocate_vmcs();
130
131     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_info->vmcs_ptr_phys));
132
133     info->vmm_data = vmx_info;
134
135     PrintDebug("Initializing VMCS (addr=%p)\n", info->vmm_data);
136     
137     // TODO: Fix vmcs fields so they're 32-bit
138
139     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_info->vmcs_ptr_phys);
140     vmx_ret = vmcs_clear(vmx_info->vmcs_ptr_phys);
141
142     if (vmx_ret != VMX_SUCCESS) {
143         PrintError("VMCLEAR failed\n");
144         return -1;
145     }
146
147     PrintDebug("Loading VMCS\n");
148     vmx_ret = vmcs_load(vmx_info->vmcs_ptr_phys);
149
150     if (vmx_ret != VMX_SUCCESS) {
151         PrintError("VMPTRLD failed\n");
152         return -1;
153     }
154
155
156
157     /******* Setup Host State **********/
158
159     /* Cache GDTR, IDTR, and TR in host struct */
160     addr_t gdtr_base;
161     struct {
162         uint16_t selector;
163         addr_t   base;
164     } __attribute__((packed)) tmp_seg;
165     
166
167     __asm__ __volatile__(
168                          "sgdt (%0);"
169                          :
170                          : "q"(&tmp_seg)
171                          : "memory"
172                          );
173     gdtr_base = tmp_seg.base;
174     vmx_info->host_state.gdtr.base = gdtr_base;
175
176     __asm__ __volatile__(
177                          "sidt (%0);"
178                          :
179                          : "q"(&tmp_seg)
180                          : "memory"
181                          );
182     vmx_info->host_state.idtr.base = tmp_seg.base;
183
184     __asm__ __volatile__(
185                          "str (%0);"
186                          :
187                          : "q"(&tmp_seg)
188                          : "memory"
189                          );
190     vmx_info->host_state.tr.selector = tmp_seg.selector;
191
192     /* The GDTR *index* is bits 3-15 of the selector. */
193     struct tss_descriptor * desc = NULL;
194     desc = (struct tss_descriptor *)(gdtr_base + (8 * (tmp_seg.selector >> 3)));
195
196     tmp_seg.base = ((desc->base1) |
197                     (desc->base2 << 16) |
198                     (desc->base3 << 24) |
199 #ifdef __V3_64BIT__
200                     ((uint64_t)desc->base4 << 32)
201 #else 
202                     (0)
203 #endif
204                     );
205
206     vmx_info->host_state.tr.base = tmp_seg.base;
207
208   
209
210     /********** Setup and VMX Control Fields from MSR ***********/
211     /* Setup IO map */
212     v3_init_vmx_io_map(info);
213     v3_init_vmx_msr_map(info);
214
215     struct v3_msr tmp_msr;
216
217     v3_get_msr(VMX_PINBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
218
219     /* Add external interrupts, NMI exiting, and virtual NMI */
220     vmx_info->pin_ctrls.value =  tmp_msr.lo;
221     vmx_info->pin_ctrls.nmi_exit = 1;
222     vmx_info->pin_ctrls.ext_int_exit = 1;
223
224     v3_get_msr(VMX_PROCBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
225
226     vmx_info->pri_proc_ctrls.value = tmp_msr.lo;
227     vmx_info->pri_proc_ctrls.use_io_bitmap = 1;
228     vmx_info->pri_proc_ctrls.hlt_exit = 1;
229     vmx_info->pri_proc_ctrls.invlpg_exit = 1;
230     vmx_info->pri_proc_ctrls.use_msr_bitmap = 1;
231     vmx_info->pri_proc_ctrls.pause_exit = 1;
232
233     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->io_map.arch_data));
234     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
235             (addr_t)V3_PAddr(info->io_map.arch_data) + PAGE_SIZE_4KB); 
236
237     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(info->msr_map.arch_data));
238
239     v3_get_msr(VMX_EXIT_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
240     vmx_info->exit_ctrls.value = tmp_msr.lo;
241     vmx_info->exit_ctrls.host_64_on = 1;
242
243     if ((vmx_info->exit_ctrls.save_efer == 1) || (vmx_info->exit_ctrls.ld_efer == 1)) {
244         vmx_info->ia32e_avail = 1;
245     }
246
247     v3_get_msr(VMX_ENTRY_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
248     vmx_info->entry_ctrls.value = tmp_msr.lo;
249
250     {
251         struct vmx_exception_bitmap excp_bmap;
252         excp_bmap.value = 0;
253         
254         excp_bmap.pf = 1;
255     
256         vmx_ret |= check_vmcs_write(VMCS_EXCP_BITMAP, excp_bmap.value);
257     }
258     /******* Setup VMXAssist guest state ***********/
259
260     info->rip = 0xd0000;
261     info->vm_regs.rsp = 0x80000;
262
263     struct rflags * flags = (struct rflags *)&(info->ctrl_regs.rflags);
264     flags->rsvd1 = 1;
265
266     /* Print Control MSRs */
267     v3_get_msr(VMX_CR0_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
268     PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
269
270     v3_get_msr(VMX_CR4_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
271     PrintDebug("CR4 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
272
273
274 #define GUEST_CR0 0x80000031
275 #define GUEST_CR4 0x00002000
276     info->ctrl_regs.cr0 = GUEST_CR0;
277     info->ctrl_regs.cr4 = GUEST_CR4;
278
279     ((struct cr0_32 *)&(info->shdw_pg_state.guest_cr0))->pe = 1;
280    
281     /* Setup paging */
282     if (info->shdw_pg_mode == SHADOW_PAGING) {
283         PrintDebug("Creating initial shadow page table\n");
284
285         if (v3_init_passthrough_pts(info) == -1) {
286             PrintError("Could not initialize passthrough page tables\n");
287             return -1;
288         }
289         
290 #define CR0_PE 0x00000001
291 #define CR0_PG 0x80000000
292
293
294         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG) );
295         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
296
297         info->ctrl_regs.cr3 = info->direct_map_pt;
298
299         // vmx_info->pinbased_ctrls |= NMI_EXIT;
300
301         /* Add CR exits */
302         vmx_info->pri_proc_ctrls.cr3_ld_exit = 1;
303         vmx_info->pri_proc_ctrls.cr3_str_exit = 1;
304     }
305
306     // Setup segment registers
307     {
308         struct v3_segment * seg_reg = (struct v3_segment *)&(info->segments);
309
310         int i;
311
312         for (i = 0; i < 10; i++) {
313             seg_reg[i].selector = 3 << 3;
314             seg_reg[i].limit = 0xffff;
315             seg_reg[i].base = 0x0;
316         }
317
318         info->segments.cs.selector = 2<<3;
319
320         /* Set only the segment registers */
321         for (i = 0; i < 6; i++) {
322             seg_reg[i].limit = 0xfffff;
323             seg_reg[i].granularity = 1;
324             seg_reg[i].type = 3;
325             seg_reg[i].system = 1;
326             seg_reg[i].dpl = 0;
327             seg_reg[i].present = 1;
328             seg_reg[i].db = 1;
329         }
330
331         info->segments.cs.type = 0xb;
332
333         info->segments.ldtr.selector = 0x20;
334         info->segments.ldtr.type = 2;
335         info->segments.ldtr.system = 0;
336         info->segments.ldtr.present = 1;
337         info->segments.ldtr.granularity = 0;
338
339     
340         /************* Map in GDT and vmxassist *************/
341
342         uint64_t  gdt[] __attribute__ ((aligned(32))) = {
343             0x0000000000000000ULL,              /* 0x00: reserved */
344             0x0000830000000000ULL,              /* 0x08: 32-bit TSS */
345             //0x0000890000000000ULL,            /* 0x08: 32-bit TSS */
346             0x00CF9b000000FFFFULL,              /* 0x10: CS 32-bit */
347             0x00CF93000000FFFFULL,              /* 0x18: DS 32-bit */
348             0x000082000000FFFFULL,              /* 0x20: LDTR 32-bit */
349         };
350
351 #define VMXASSIST_GDT   0x10000
352         addr_t vmxassist_gdt = 0;
353
354         if (guest_pa_to_host_va(info, VMXASSIST_GDT, &vmxassist_gdt) == -1) {
355             PrintError("Could not find VMXASSIST GDT destination\n");
356             return -1;
357         }
358
359         memcpy((void *)vmxassist_gdt, gdt, sizeof(uint64_t) * 5);
360         
361         info->segments.gdtr.base = VMXASSIST_GDT;
362
363 #define VMXASSIST_TSS   0x40000
364         uint64_t vmxassist_tss = VMXASSIST_TSS;
365         gdt[0x08 / sizeof(gdt[0])] |=
366             ((vmxassist_tss & 0xFF000000) << (56 - 24)) |
367             ((vmxassist_tss & 0x00FF0000) << (32 - 16)) |
368             ((vmxassist_tss & 0x0000FFFF) << (16)) |
369             (8392 - 1);
370
371         info->segments.tr.selector = 0x08;
372         info->segments.tr.base = vmxassist_tss;
373
374         //info->segments.tr.type = 0x9; 
375         info->segments.tr.type = 0x3;
376         info->segments.tr.system = 0;
377         info->segments.tr.present = 1;
378         info->segments.tr.granularity = 0;
379     }
380  
381     // setup VMXASSIST
382     { 
383 #define VMXASSIST_START 0x000d0000
384         extern uint8_t v3_vmxassist_start[];
385         extern uint8_t v3_vmxassist_end[];
386         addr_t vmxassist_dst = 0;
387
388         if (guest_pa_to_host_va(info, VMXASSIST_START, &vmxassist_dst) == -1) {
389             PrintError("Could not find VMXASSIST destination\n");
390             return -1;
391         }
392
393         memcpy((void *)vmxassist_dst, v3_vmxassist_start, v3_vmxassist_end - v3_vmxassist_start);
394     }    
395
396     /*** Write all the info to the VMCS ***/
397
398 #define DEBUGCTL_MSR 0x1d9
399     v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
400     vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
401
402     info->dbg_regs.dr7 = 0x400;
403
404     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
405     
406     if (v3_update_vmcs_ctrl_fields(info)) {
407         PrintError("Could not write control fields!\n");
408         return -1;
409     }
410     
411     if (v3_update_vmcs_host_state(info)) {
412         PrintError("Could not write host state\n");
413         return -1;
414     }
415
416
417     if (v3_update_vmcs_guest_state(info) != VMX_SUCCESS) {
418         PrintError("Writing guest state failed!\n");
419         return -1;
420     }
421
422     v3_print_vmcs();
423
424     vmx_info->state = VMXASSIST_DISABLED;
425
426     v3_post_config_guest(info, config_ptr);
427
428     return 0;
429 }
430
431
432 static int start_vmx_guest(struct guest_info* info) {
433     uint32_t error = 0;
434     int ret = 0;
435
436     PrintDebug("Attempting VMLAUNCH\n");
437
438     info->run_state = VM_RUNNING;
439
440     rdtscll(info->time_state.cached_host_tsc);
441
442     ret = v3_vmx_vmlaunch(&(info->vm_regs), info, &(info->ctrl_regs));
443
444     if (ret != VMX_SUCCESS) {
445         vmcs_read(VMCS_INSTR_ERR, &error);
446         PrintError("VMLAUNCH failed: %d\n", error);
447
448         v3_print_vmcs();
449     }
450
451     PrintDebug("Returned from VMLAUNCH ret=%d\n", ret);
452
453     return -1;
454 }
455
456
457 int v3_is_vmx_capable() {
458     v3_msr_t feature_msr;
459     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
460
461     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
462
463     PrintDebug("ECX: 0x%x\n", ecx);
464
465     if (ecx & CPUID_1_ECX_VTXFLAG) {
466         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
467         
468         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
469
470         if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
471             PrintDebug("VMX is locked -- enable in the BIOS\n");
472             return 0;
473         }
474
475     } else {
476         PrintDebug("VMX not supported on this cpu\n");
477         return 0;
478     }
479
480     return 1;
481 }
482
483 static int has_vmx_nested_paging() {
484     return 0;
485 }
486
487
488
489 void v3_init_vmx_cpu(int cpu_id) {
490     extern v3_cpu_arch_t v3_cpu_types[];
491     struct v3_msr tmp_msr;
492     uint64_t ret = 0;
493
494     v3_get_msr(VMX_CR4_FIXED0_MSR,&(tmp_msr.hi),&(tmp_msr.lo));
495     
496     __asm__ __volatile__ (
497                           "movq %%cr4, %%rbx;"
498                           "orq  $0x00002000, %%rbx;"
499                           "movq %%rbx, %0;"
500                           : "=m"(ret) 
501                           :
502                           : "%rbx"
503                           );
504
505     if ((~ret & tmp_msr.value) == 0) {
506         __asm__ __volatile__ (
507                               "movq %0, %%cr4;"
508                               :
509                               : "q"(ret)
510                               );
511     } else {
512         PrintError("Invalid CR4 Settings!\n");
513         return;
514     }
515
516     __asm__ __volatile__ (
517                           "movq %%cr0, %%rbx; "
518                           "orq  $0x00000020,%%rbx; "
519                           "movq %%rbx, %%cr0;"
520                           :
521                           :
522                           : "%rbx"
523                           );
524     //
525     // Should check and return Error here.... 
526
527
528     // Setup VMXON Region
529     host_vmcs_ptrs[cpu_id] = allocate_vmcs();
530
531     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);
532
533     if (v3_enable_vmx(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
534         PrintDebug("VMX Enabled\n");
535     } else {
536         PrintError("VMX initialization failure\n");
537         return;
538     }
539         
540
541     if (has_vmx_nested_paging() == 1) {
542         v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
543     } else {
544         v3_cpu_types[cpu_id] = V3_VMX_CPU;
545     }
546
547 }
548
549
550 void v3_init_vmx_hooks(struct v3_ctrl_ops * vm_ops) {
551
552     // Setup the VMX specific vmm operations
553     vm_ops->init_guest = &init_vmx_guest;
554     vm_ops->start_guest = &start_vmx_guest;
555     vm_ops->has_nested_paging = &has_vmx_nested_paging;
556
557 }
558