Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


VMX is working for a 32-bit Linux kernel. It should also work for a 64-bit kernel...
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Peter Dinda <pdinda@northwestern.edu> 
11  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
13  * All rights reserved.
14  *
15  * Author: Peter Dinda <pdinda@northwestern.edu>
16  *         Jack Lange <jarusl@cs.northwestern.edu>
17  *
18  * This is free software.  You are permitted to use,
19  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20  */
21
22
23 #include <palacios/vmx.h>
24 #include <palacios/vmm.h>
25 #include <palacios/vmcs.h>
26 #include <palacios/vmx_lowlevel.h>
27 #include <palacios/vmm_lowlevel.h>
28 #include <palacios/vmm_ctrl_regs.h>
29 #include <palacios/vmm_config.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34
35 static addr_t vmxon_ptr_phys;
36 extern int v3_vmx_exit_handler();
37 extern int v3_vmx_vmlaunch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
38
39 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
40     int ret = 0;
41
42     ret = vmcs_write(field,val);
43
44     if (ret != VMX_SUCCESS) {
45         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
46         return 1;
47     }
48
49     return 0;
50 }
51
52 #if 0
53 // For the 32 bit reserved bit fields 
54 // MB1s are in the low 32 bits, MBZs are in the high 32 bits of the MSR
55 static uint32_t sanitize_bits1(uint32_t msr_num, uint32_t val) {
56     v3_msr_t mask_msr;
57
58     PrintDebug("sanitize_bits1 (MSR:%x)\n", msr_num);
59
60     v3_get_msr(msr_num, &mask_msr.hi, &mask_msr.lo);
61
62     PrintDebug("MSR %x = %x : %x \n", msr_num, mask_msr.hi, mask_msr.lo);
63
64     val |= mask_msr.lo;
65     val |= mask_msr.hi;
66   
67     return val;
68 }
69
70
71
72 static addr_t sanitize_bits2(uint32_t msr_num0, uint32_t msr_num1, addr_t val) {
73     v3_msr_t msr0, msr1;
74     addr_t msr0_val, msr1_val;
75
76     PrintDebug("sanitize_bits2 (MSR0=%x, MSR1=%x)\n", msr_num0, msr_num1);
77
78     v3_get_msr(msr_num0, &msr0.hi, &msr0.lo);
79     v3_get_msr(msr_num1, &msr1.hi, &msr1.lo);
80   
81     // This generates a mask that is the natural bit width of the CPU
82     msr0_val = msr0.value;
83     msr1_val = msr1.value;
84
85     PrintDebug("MSR %x = %p, %x = %p \n", msr_num0, (void*)msr0_val, msr_num1, (void*)msr1_val);
86
87     val |= msr0_val;
88     val |= msr1_val;
89
90     return val;
91 }
92
93
94
95 #endif
96
97
98 static addr_t allocate_vmcs() {
99     reg_ex_t msr;
100     struct vmcs_data * vmcs_page = NULL;
101
102     PrintDebug("Allocating page\n");
103
104     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
105     memset(vmcs_page, 0, 4096);
106
107     v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
108     
109     vmcs_page->revision = ((struct vmx_basic_msr*)&msr)->revision;
110     PrintDebug("VMX Revision: 0x%x\n",vmcs_page->revision);
111
112     return (addr_t)V3_PAddr((void *)vmcs_page);
113 }
114
115
116 static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config_ptr) {
117     struct vmx_data * vmx_info = NULL;
118     int vmx_ret = 0;
119
120     v3_pre_config_guest(info, config_ptr);
121
122     vmx_info = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
123
124     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_info);
125
126     PrintDebug("Allocating VMCS\n");
127     vmx_info->vmcs_ptr_phys = allocate_vmcs();
128
129     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_info->vmcs_ptr_phys));
130
131     info->vmm_data = vmx_info;
132
133     PrintDebug("Initializing VMCS (addr=%p)\n", info->vmm_data);
134     
135     // TODO: Fix vmcs fields so they're 32-bit
136
137     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_info->vmcs_ptr_phys);
138     vmx_ret = vmcs_clear(vmx_info->vmcs_ptr_phys);
139
140     if (vmx_ret != VMX_SUCCESS) {
141         PrintError("VMCLEAR failed\n");
142         return -1;
143     }
144
145     PrintDebug("Loading VMCS\n");
146     vmx_ret = vmcs_load(vmx_info->vmcs_ptr_phys);
147
148     if (vmx_ret != VMX_SUCCESS) {
149         PrintError("VMPTRLD failed\n");
150         return -1;
151     }
152
153
154
155     /******* Setup Host State **********/
156
157     /* Cache GDTR, IDTR, and TR in host struct */
158     addr_t gdtr_base;
159     struct {
160         uint16_t selector;
161         addr_t   base;
162     } __attribute__((packed)) tmp_seg;
163     
164
165     __asm__ __volatile__(
166                          "sgdt (%0);"
167                          :
168                          : "q"(&tmp_seg)
169                          : "memory"
170                          );
171     gdtr_base = tmp_seg.base;
172     vmx_info->host_state.gdtr.base = gdtr_base;
173
174     __asm__ __volatile__(
175                          "sidt (%0);"
176                          :
177                          : "q"(&tmp_seg)
178                          : "memory"
179                          );
180     vmx_info->host_state.idtr.base = tmp_seg.base;
181
182     __asm__ __volatile__(
183                          "str (%0);"
184                          :
185                          : "q"(&tmp_seg)
186                          : "memory"
187                          );
188     vmx_info->host_state.tr.selector = tmp_seg.selector;
189
190     /* The GDTR *index* is bits 3-15 of the selector. */
191     struct tss_descriptor * desc = NULL;
192     desc = (struct tss_descriptor *)(gdtr_base + (8 * (tmp_seg.selector >> 3)));
193
194     tmp_seg.base = ((desc->base1) |
195                     (desc->base2 << 16) |
196                     (desc->base3 << 24) |
197 #ifdef __V3_64BIT__
198                     ((uint64_t)desc->base4 << 32)
199 #else 
200                     (0)
201 #endif
202                     );
203
204     vmx_info->host_state.tr.base = tmp_seg.base;
205
206   
207
208     /********** Setup and VMX Control Fields from MSR ***********/
209     /* Setup IO map */
210     v3_init_vmx_io_map(info);
211     v3_init_vmx_msr_map(info);
212
213     struct v3_msr tmp_msr;
214
215     v3_get_msr(VMX_PINBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
216
217     /* Add external interrupts, NMI exiting, and virtual NMI */
218     vmx_info->pin_ctrls.value =  tmp_msr.lo;
219     vmx_info->pin_ctrls.nmi_exit = 1;
220     vmx_info->pin_ctrls.ext_int_exit = 1;
221
222     v3_get_msr(VMX_PROCBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
223
224     vmx_info->pri_proc_ctrls.value = tmp_msr.lo;
225     vmx_info->pri_proc_ctrls.use_io_bitmap = 1;
226     vmx_info->pri_proc_ctrls.hlt_exit = 1;
227     vmx_info->pri_proc_ctrls.invlpg_exit = 1;
228     vmx_info->pri_proc_ctrls.use_msr_bitmap = 1;
229     vmx_info->pri_proc_ctrls.pause_exit = 1;
230
231     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->io_map.arch_data));
232     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
233             (addr_t)V3_PAddr(info->io_map.arch_data) + PAGE_SIZE_4KB); 
234
235     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(info->msr_map.arch_data));
236
237     v3_get_msr(VMX_EXIT_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
238     vmx_info->exit_ctrls.value = tmp_msr.lo;
239     vmx_info->exit_ctrls.host_64_on = 1;
240
241     if ((vmx_info->exit_ctrls.save_efer == 1) || (vmx_info->exit_ctrls.ld_efer == 1)) {
242         vmx_info->ia32e_avail = 1;
243     }
244
245     v3_get_msr(VMX_ENTRY_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
246     vmx_info->entry_ctrls.value = tmp_msr.lo;
247
248     {
249         struct vmx_exception_bitmap excp_bmap;
250         excp_bmap.value = 0;
251         
252         excp_bmap.pf = 1;
253     
254         vmx_ret |= check_vmcs_write(VMCS_EXCP_BITMAP, excp_bmap.value);
255     }
256     /******* Setup VMXAssist guest state ***********/
257
258     info->rip = 0xd0000;
259     info->vm_regs.rsp = 0x80000;
260
261     struct rflags * flags = (struct rflags *)&(info->ctrl_regs.rflags);
262     flags->rsvd1 = 1;
263
264     /* Print Control MSRs */
265     v3_get_msr(VMX_CR0_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
266     PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
267
268     v3_get_msr(VMX_CR4_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
269     PrintDebug("CR4 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
270
271
272 #define GUEST_CR0 0x80000031
273 #define GUEST_CR4 0x00002000
274     info->ctrl_regs.cr0 = GUEST_CR0;
275     info->ctrl_regs.cr4 = GUEST_CR4;
276
277     ((struct cr0_32 *)&(info->shdw_pg_state.guest_cr0))->pe = 1;
278    
279     /* Setup paging */
280     if (info->shdw_pg_mode == SHADOW_PAGING) {
281         PrintDebug("Creating initial shadow page table\n");
282
283         if (v3_init_passthrough_pts(info) == -1) {
284             PrintError("Could not initialize passthrough page tables\n");
285             return -1;
286         }
287         
288 #define CR0_PE 0x00000001
289 #define CR0_PG 0x80000000
290
291
292         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG) );
293         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
294
295         info->ctrl_regs.cr3 = info->direct_map_pt;
296
297         // vmx_info->pinbased_ctrls |= NMI_EXIT;
298
299         /* Add CR exits */
300         vmx_info->pri_proc_ctrls.cr3_ld_exit = 1;
301         vmx_info->pri_proc_ctrls.cr3_str_exit = 1;
302     }
303
304     // Setup segment registers
305     {
306         struct v3_segment * seg_reg = (struct v3_segment *)&(info->segments);
307
308         int i;
309
310         for (i = 0; i < 10; i++) {
311             seg_reg[i].selector = 3 << 3;
312             seg_reg[i].limit = 0xffff;
313             seg_reg[i].base = 0x0;
314         }
315
316         info->segments.cs.selector = 2<<3;
317
318         /* Set only the segment registers */
319         for (i = 0; i < 6; i++) {
320             seg_reg[i].limit = 0xfffff;
321             seg_reg[i].granularity = 1;
322             seg_reg[i].type = 3;
323             seg_reg[i].system = 1;
324             seg_reg[i].dpl = 0;
325             seg_reg[i].present = 1;
326             seg_reg[i].db = 1;
327         }
328
329         info->segments.cs.type = 0xb;
330
331         info->segments.ldtr.selector = 0x20;
332         info->segments.ldtr.type = 2;
333         info->segments.ldtr.system = 0;
334         info->segments.ldtr.present = 1;
335         info->segments.ldtr.granularity = 0;
336
337     
338         /************* Map in GDT and vmxassist *************/
339
340         uint64_t  gdt[] __attribute__ ((aligned(32))) = {
341             0x0000000000000000ULL,              /* 0x00: reserved */
342             0x0000830000000000ULL,              /* 0x08: 32-bit TSS */
343             //0x0000890000000000ULL,            /* 0x08: 32-bit TSS */
344             0x00CF9b000000FFFFULL,              /* 0x10: CS 32-bit */
345             0x00CF93000000FFFFULL,              /* 0x18: DS 32-bit */
346             0x000082000000FFFFULL,              /* 0x20: LDTR 32-bit */
347         };
348
349 #define VMXASSIST_GDT   0x10000
350         addr_t vmxassist_gdt = 0;
351
352         if (guest_pa_to_host_va(info, VMXASSIST_GDT, &vmxassist_gdt) == -1) {
353             PrintError("Could not find VMXASSIST GDT destination\n");
354             return -1;
355         }
356
357         memcpy((void *)vmxassist_gdt, gdt, sizeof(uint64_t) * 5);
358         
359         info->segments.gdtr.base = VMXASSIST_GDT;
360
361 #define VMXASSIST_TSS   0x40000
362         uint64_t vmxassist_tss = VMXASSIST_TSS;
363         gdt[0x08 / sizeof(gdt[0])] |=
364             ((vmxassist_tss & 0xFF000000) << (56 - 24)) |
365             ((vmxassist_tss & 0x00FF0000) << (32 - 16)) |
366             ((vmxassist_tss & 0x0000FFFF) << (16)) |
367             (8392 - 1);
368
369         info->segments.tr.selector = 0x08;
370         info->segments.tr.base = vmxassist_tss;
371
372         //info->segments.tr.type = 0x9; 
373         info->segments.tr.type = 0x3;
374         info->segments.tr.system = 0;
375         info->segments.tr.present = 1;
376         info->segments.tr.granularity = 0;
377     }
378  
379     // setup VMXASSIST
380     { 
381 #define VMXASSIST_START 0x000d0000
382         extern uint8_t v3_vmxassist_start[];
383         extern uint8_t v3_vmxassist_end[];
384         addr_t vmxassist_dst = 0;
385
386         if (guest_pa_to_host_va(info, VMXASSIST_START, &vmxassist_dst) == -1) {
387             PrintError("Could not find VMXASSIST destination\n");
388             return -1;
389         }
390
391         memcpy((void *)vmxassist_dst, v3_vmxassist_start, v3_vmxassist_end - v3_vmxassist_start);
392     }    
393
394     /*** Write all the info to the VMCS ***/
395
396 #define DEBUGCTL_MSR 0x1d9
397     v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
398     vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
399
400     info->dbg_regs.dr7 = 0x400;
401
402     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
403     
404     if (v3_update_vmcs_ctrl_fields(info)) {
405         PrintError("Could not write control fields!\n");
406         return -1;
407     }
408     
409     if (v3_update_vmcs_host_state(info)) {
410         PrintError("Could not write host state\n");
411         return -1;
412     }
413
414
415     if (v3_update_vmcs_guest_state(info) != VMX_SUCCESS) {
416         PrintError("Writing guest state failed!\n");
417         return -1;
418     }
419
420     v3_print_vmcs();
421
422     vmx_info->state = VMXASSIST_DISABLED;
423
424     v3_post_config_guest(info, config_ptr);
425
426     return 0;
427 }
428
429
430 static int start_vmx_guest(struct guest_info* info) {
431     uint32_t error = 0;
432     int ret = 0;
433
434     PrintDebug("Attempting VMLAUNCH\n");
435
436     info->run_state = VM_RUNNING;
437
438     rdtscll(info->time_state.cached_host_tsc);
439
440     ret = v3_vmx_vmlaunch(&(info->vm_regs), info, &(info->ctrl_regs));
441
442     if (ret != VMX_SUCCESS) {
443         vmcs_read(VMCS_INSTR_ERR, &error);
444         PrintError("VMLAUNCH failed: %d\n", error);
445
446         v3_print_vmcs();
447     }
448
449     PrintDebug("Returned from VMLAUNCH ret=%d\n", ret);
450
451     return -1;
452 }
453
454
455 int v3_is_vmx_capable() {
456     v3_msr_t feature_msr;
457     addr_t eax = 0, ebx = 0, ecx = 0, edx = 0;
458
459     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
460
461     PrintDebug("ECX: %p\n", (void*)ecx);
462
463     if (ecx & CPUID_1_ECX_VTXFLAG) {
464         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
465         
466         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
467
468         if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
469             PrintDebug("VMX is locked -- enable in the BIOS\n");
470             return 0;
471         }
472
473     } else {
474         PrintDebug("VMX not supported on this cpu\n");
475         return 0;
476     }
477
478     return 1;
479 }
480
481 static int has_vmx_nested_paging() {
482     return 0;
483 }
484
485
486
487 void v3_init_vmx(struct v3_ctrl_ops * vm_ops) {
488     extern v3_cpu_arch_t v3_cpu_type;
489     struct v3_msr tmp_msr;
490     uint64_t ret = 0;
491
492     v3_get_msr(VMX_CR4_FIXED0_MSR,&(tmp_msr.hi),&(tmp_msr.lo));
493     
494     __asm__ __volatile__ (
495                           "movq %%cr4, %%rbx;"
496                           "orq  $0x00002000, %%rbx;"
497                           "movq %%rbx, %0;"
498                           : "=m"(ret) 
499                           :
500                           : "%rbx"
501                           );
502
503     if ((~ret & tmp_msr.value) == 0) {
504         __asm__ __volatile__ (
505                               "movq %0, %%cr4;"
506                               :
507                               : "q"(ret)
508                               );
509     } else {
510         PrintError("Invalid CR4 Settings!\n");
511         return;
512     }
513
514     __asm__ __volatile__ (
515                           "movq %%cr0, %%rbx; "
516                           "orq  $0x00000020,%%rbx; "
517                           "movq %%rbx, %%cr0;"
518                           :
519                           :
520                           : "%rbx"
521                           );
522     //
523     // Should check and return Error here.... 
524
525
526     // Setup VMXON Region
527     vmxon_ptr_phys = allocate_vmcs();
528
529     PrintDebug("VMXON pointer: 0x%p\n", (void *)vmxon_ptr_phys);
530
531     if (v3_enable_vmx(vmxon_ptr_phys) == VMX_SUCCESS) {
532         PrintDebug("VMX Enabled\n");
533     } else {
534         PrintError("VMX initialization failure\n");
535         return;
536     }
537         
538
539     if (has_vmx_nested_paging() == 1) {
540         v3_cpu_type = V3_VMX_EPT_CPU;
541     } else {
542         v3_cpu_type = V3_VMX_CPU;
543     }
544
545     // Setup the VMX specific vmm operations
546     vm_ops->init_guest = &init_vmx_guest;
547     vm_ops->start_guest = &start_vmx_guest;
548     vm_ops->has_nested_paging = &has_vmx_nested_paging;
549
550 }
551