2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Peter Dinda <pdinda@northwestern.edu>
11 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
13 * All rights reserved.
15 * Author: Peter Dinda <pdinda@northwestern.edu>
16 * Jack Lange <jarusl@cs.northwestern.edu>
18 * This is free software. You are permitted to use,
19 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
23 #include <palacios/vmx.h>
24 #include <palacios/vmm.h>
25 #include <palacios/vmcs.h>
26 #include <palacios/vmx_lowlevel.h>
27 #include <palacios/vmm_lowlevel.h>
28 #include <palacios/vmm_ctrl_regs.h>
29 #include <palacios/vmm_config.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
35 static addr_t vmxon_ptr_phys;
36 extern int v3_vmx_exit_handler();
37 extern int v3_vmx_vmlaunch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
39 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
42 ret = vmcs_write(field,val);
44 if (ret != VMX_SUCCESS) {
45 PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
53 // For the 32 bit reserved bit fields
54 // MB1s are in the low 32 bits, MBZs are in the high 32 bits of the MSR
55 static uint32_t sanitize_bits1(uint32_t msr_num, uint32_t val) {
58 PrintDebug("sanitize_bits1 (MSR:%x)\n", msr_num);
60 v3_get_msr(msr_num, &mask_msr.hi, &mask_msr.lo);
62 PrintDebug("MSR %x = %x : %x \n", msr_num, mask_msr.hi, mask_msr.lo);
72 static addr_t sanitize_bits2(uint32_t msr_num0, uint32_t msr_num1, addr_t val) {
74 addr_t msr0_val, msr1_val;
76 PrintDebug("sanitize_bits2 (MSR0=%x, MSR1=%x)\n", msr_num0, msr_num1);
78 v3_get_msr(msr_num0, &msr0.hi, &msr0.lo);
79 v3_get_msr(msr_num1, &msr1.hi, &msr1.lo);
81 // This generates a mask that is the natural bit width of the CPU
82 msr0_val = msr0.value;
83 msr1_val = msr1.value;
85 PrintDebug("MSR %x = %p, %x = %p \n", msr_num0, (void*)msr0_val, msr_num1, (void*)msr1_val);
98 static addr_t allocate_vmcs() {
100 struct vmcs_data * vmcs_page = NULL;
102 PrintDebug("Allocating page\n");
104 vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
105 memset(vmcs_page, 0, 4096);
107 v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
109 vmcs_page->revision = ((struct vmx_basic_msr*)&msr)->revision;
110 PrintDebug("VMX Revision: 0x%x\n",vmcs_page->revision);
112 return (addr_t)V3_PAddr((void *)vmcs_page);
116 static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config_ptr) {
117 struct vmx_data * vmx_info = NULL;
120 v3_pre_config_guest(info, config_ptr);
122 vmx_info = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
124 PrintDebug("vmx_data pointer: %p\n", (void *)vmx_info);
126 PrintDebug("Allocating VMCS\n");
127 vmx_info->vmcs_ptr_phys = allocate_vmcs();
129 PrintDebug("VMCS pointer: %p\n", (void *)(vmx_info->vmcs_ptr_phys));
131 info->vmm_data = vmx_info;
133 PrintDebug("Initializing VMCS (addr=%p)\n", info->vmm_data);
135 // TODO: Fix vmcs fields so they're 32-bit
137 PrintDebug("Clearing VMCS: %p\n", (void *)vmx_info->vmcs_ptr_phys);
138 vmx_ret = vmcs_clear(vmx_info->vmcs_ptr_phys);
140 if (vmx_ret != VMX_SUCCESS) {
141 PrintError("VMCLEAR failed\n");
145 PrintDebug("Loading VMCS\n");
146 vmx_ret = vmcs_load(vmx_info->vmcs_ptr_phys);
148 if (vmx_ret != VMX_SUCCESS) {
149 PrintError("VMPTRLD failed\n");
155 /******* Setup Host State **********/
157 /* Cache GDTR, IDTR, and TR in host struct */
162 } __attribute__((packed)) tmp_seg;
165 __asm__ __volatile__(
171 gdtr_base = tmp_seg.base;
172 vmx_info->host_state.gdtr.base = gdtr_base;
174 __asm__ __volatile__(
180 vmx_info->host_state.idtr.base = tmp_seg.base;
182 __asm__ __volatile__(
188 vmx_info->host_state.tr.selector = tmp_seg.selector;
190 /* The GDTR *index* is bits 3-15 of the selector. */
191 struct tss_descriptor * desc = NULL;
192 desc = (struct tss_descriptor *)(gdtr_base + (8 * (tmp_seg.selector >> 3)));
194 tmp_seg.base = ((desc->base1) |
195 (desc->base2 << 16) |
196 (desc->base3 << 24) |
198 ((uint64_t)desc->base4 << 32)
204 vmx_info->host_state.tr.base = tmp_seg.base;
208 /********** Setup and VMX Control Fields from MSR ***********/
210 v3_init_vmx_io_map(info);
211 v3_init_vmx_msr_map(info);
213 struct v3_msr tmp_msr;
215 v3_get_msr(VMX_PINBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
217 /* Add external interrupts, NMI exiting, and virtual NMI */
218 vmx_info->pin_ctrls.value = tmp_msr.lo;
219 vmx_info->pin_ctrls.nmi_exit = 1;
220 vmx_info->pin_ctrls.ext_int_exit = 1;
222 v3_get_msr(VMX_PROCBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
224 vmx_info->pri_proc_ctrls.value = tmp_msr.lo;
225 vmx_info->pri_proc_ctrls.use_io_bitmap = 1;
226 vmx_info->pri_proc_ctrls.hlt_exit = 1;
227 vmx_info->pri_proc_ctrls.invlpg_exit = 1;
228 vmx_info->pri_proc_ctrls.use_msr_bitmap = 1;
229 vmx_info->pri_proc_ctrls.pause_exit = 1;
231 vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->io_map.arch_data));
232 vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR,
233 (addr_t)V3_PAddr(info->io_map.arch_data) + PAGE_SIZE_4KB);
235 vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(info->msr_map.arch_data));
237 v3_get_msr(VMX_EXIT_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
238 vmx_info->exit_ctrls.value = tmp_msr.lo;
239 vmx_info->exit_ctrls.host_64_on = 1;
241 if ((vmx_info->exit_ctrls.save_efer == 1) || (vmx_info->exit_ctrls.ld_efer == 1)) {
242 vmx_info->ia32e_avail = 1;
245 v3_get_msr(VMX_ENTRY_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
246 vmx_info->entry_ctrls.value = tmp_msr.lo;
249 struct vmx_exception_bitmap excp_bmap;
254 vmx_ret |= check_vmcs_write(VMCS_EXCP_BITMAP, excp_bmap.value);
256 /******* Setup VMXAssist guest state ***********/
259 info->vm_regs.rsp = 0x80000;
261 struct rflags * flags = (struct rflags *)&(info->ctrl_regs.rflags);
264 /* Print Control MSRs */
265 v3_get_msr(VMX_CR0_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
266 PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
268 v3_get_msr(VMX_CR4_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
269 PrintDebug("CR4 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
272 #define GUEST_CR0 0x80000031
273 #define GUEST_CR4 0x00002000
274 info->ctrl_regs.cr0 = GUEST_CR0;
275 info->ctrl_regs.cr4 = GUEST_CR4;
277 ((struct cr0_32 *)&(info->shdw_pg_state.guest_cr0))->pe = 1;
280 if (info->shdw_pg_mode == SHADOW_PAGING) {
281 PrintDebug("Creating initial shadow page table\n");
283 if (v3_init_passthrough_pts(info) == -1) {
284 PrintError("Could not initialize passthrough page tables\n");
288 #define CR0_PE 0x00000001
289 #define CR0_PG 0x80000000
292 vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG) );
293 vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
295 info->ctrl_regs.cr3 = info->direct_map_pt;
297 // vmx_info->pinbased_ctrls |= NMI_EXIT;
300 vmx_info->pri_proc_ctrls.cr3_ld_exit = 1;
301 vmx_info->pri_proc_ctrls.cr3_str_exit = 1;
304 // Setup segment registers
306 struct v3_segment * seg_reg = (struct v3_segment *)&(info->segments);
310 for (i = 0; i < 10; i++) {
311 seg_reg[i].selector = 3 << 3;
312 seg_reg[i].limit = 0xffff;
313 seg_reg[i].base = 0x0;
316 info->segments.cs.selector = 2<<3;
318 /* Set only the segment registers */
319 for (i = 0; i < 6; i++) {
320 seg_reg[i].limit = 0xfffff;
321 seg_reg[i].granularity = 1;
323 seg_reg[i].system = 1;
325 seg_reg[i].present = 1;
329 info->segments.cs.type = 0xb;
331 info->segments.ldtr.selector = 0x20;
332 info->segments.ldtr.type = 2;
333 info->segments.ldtr.system = 0;
334 info->segments.ldtr.present = 1;
335 info->segments.ldtr.granularity = 0;
338 /************* Map in GDT and vmxassist *************/
340 uint64_t gdt[] __attribute__ ((aligned(32))) = {
341 0x0000000000000000ULL, /* 0x00: reserved */
342 0x0000830000000000ULL, /* 0x08: 32-bit TSS */
343 //0x0000890000000000ULL, /* 0x08: 32-bit TSS */
344 0x00CF9b000000FFFFULL, /* 0x10: CS 32-bit */
345 0x00CF93000000FFFFULL, /* 0x18: DS 32-bit */
346 0x000082000000FFFFULL, /* 0x20: LDTR 32-bit */
349 #define VMXASSIST_GDT 0x10000
350 addr_t vmxassist_gdt = 0;
352 if (guest_pa_to_host_va(info, VMXASSIST_GDT, &vmxassist_gdt) == -1) {
353 PrintError("Could not find VMXASSIST GDT destination\n");
357 memcpy((void *)vmxassist_gdt, gdt, sizeof(uint64_t) * 5);
359 info->segments.gdtr.base = VMXASSIST_GDT;
361 #define VMXASSIST_TSS 0x40000
362 uint64_t vmxassist_tss = VMXASSIST_TSS;
363 gdt[0x08 / sizeof(gdt[0])] |=
364 ((vmxassist_tss & 0xFF000000) << (56 - 24)) |
365 ((vmxassist_tss & 0x00FF0000) << (32 - 16)) |
366 ((vmxassist_tss & 0x0000FFFF) << (16)) |
369 info->segments.tr.selector = 0x08;
370 info->segments.tr.base = vmxassist_tss;
372 //info->segments.tr.type = 0x9;
373 info->segments.tr.type = 0x3;
374 info->segments.tr.system = 0;
375 info->segments.tr.present = 1;
376 info->segments.tr.granularity = 0;
381 #define VMXASSIST_START 0x000d0000
382 extern uint8_t v3_vmxassist_start[];
383 extern uint8_t v3_vmxassist_end[];
384 addr_t vmxassist_dst = 0;
386 if (guest_pa_to_host_va(info, VMXASSIST_START, &vmxassist_dst) == -1) {
387 PrintError("Could not find VMXASSIST destination\n");
391 memcpy((void *)vmxassist_dst, v3_vmxassist_start, v3_vmxassist_end - v3_vmxassist_start);
394 /*** Write all the info to the VMCS ***/
396 #define DEBUGCTL_MSR 0x1d9
397 v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
398 vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
400 info->dbg_regs.dr7 = 0x400;
402 vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
404 if (v3_update_vmcs_ctrl_fields(info)) {
405 PrintError("Could not write control fields!\n");
409 if (v3_update_vmcs_host_state(info)) {
410 PrintError("Could not write host state\n");
415 if (v3_update_vmcs_guest_state(info) != VMX_SUCCESS) {
416 PrintError("Writing guest state failed!\n");
422 vmx_info->state = VMXASSIST_DISABLED;
424 v3_post_config_guest(info, config_ptr);
430 static int start_vmx_guest(struct guest_info* info) {
434 PrintDebug("Attempting VMLAUNCH\n");
436 info->run_state = VM_RUNNING;
438 rdtscll(info->time_state.cached_host_tsc);
440 ret = v3_vmx_vmlaunch(&(info->vm_regs), info, &(info->ctrl_regs));
442 if (ret != VMX_SUCCESS) {
443 vmcs_read(VMCS_INSTR_ERR, &error);
444 PrintError("VMLAUNCH failed: %d\n", error);
449 PrintDebug("Returned from VMLAUNCH ret=%d\n", ret);
455 int v3_is_vmx_capable() {
456 v3_msr_t feature_msr;
457 addr_t eax = 0, ebx = 0, ecx = 0, edx = 0;
459 v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
461 PrintDebug("ECX: %p\n", (void*)ecx);
463 if (ecx & CPUID_1_ECX_VTXFLAG) {
464 v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
466 PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
468 if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
469 PrintDebug("VMX is locked -- enable in the BIOS\n");
474 PrintDebug("VMX not supported on this cpu\n");
481 static int has_vmx_nested_paging() {
487 void v3_init_vmx(struct v3_ctrl_ops * vm_ops) {
488 extern v3_cpu_arch_t v3_cpu_type;
489 struct v3_msr tmp_msr;
492 v3_get_msr(VMX_CR4_FIXED0_MSR,&(tmp_msr.hi),&(tmp_msr.lo));
494 __asm__ __volatile__ (
496 "orq $0x00002000, %%rbx;"
503 if ((~ret & tmp_msr.value) == 0) {
504 __asm__ __volatile__ (
510 PrintError("Invalid CR4 Settings!\n");
514 __asm__ __volatile__ (
515 "movq %%cr0, %%rbx; "
516 "orq $0x00000020,%%rbx; "
523 // Should check and return Error here....
526 // Setup VMXON Region
527 vmxon_ptr_phys = allocate_vmcs();
529 PrintDebug("VMXON pointer: 0x%p\n", (void *)vmxon_ptr_phys);
531 if (v3_enable_vmx(vmxon_ptr_phys) == VMX_SUCCESS) {
532 PrintDebug("VMX Enabled\n");
534 PrintError("VMX initialization failure\n");
539 if (has_vmx_nested_paging() == 1) {
540 v3_cpu_type = V3_VMX_EPT_CPU;
542 v3_cpu_type = V3_VMX_CPU;
545 // Setup the VMX specific vmm operations
546 vm_ops->init_guest = &init_vmx_guest;
547 vm_ops->start_guest = &start_vmx_guest;
548 vm_ops->has_nested_paging = &has_vmx_nested_paging;