2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Peter Dinda <pdinda@northwestern.edu>
11 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
13 * All rights reserved.
15 * Author: Peter Dinda <pdinda@northwestern.edu>
16 * Jack Lange <jarusl@cs.northwestern.edu>
18 * This is free software. You are permitted to use,
19 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
23 #include <palacios/vmx.h>
24 #include <palacios/vmm.h>
25 #include <palacios/vmcs.h>
26 #include <palacios/vmx_lowlevel.h>
27 #include <palacios/vmm_lowlevel.h>
28 #include <palacios/vmm_ctrl_regs.h>
29 #include <palacios/vmm_config.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
35 static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = {0};
38 extern int v3_vmx_exit_handler();
39 extern int v3_vmx_vmlaunch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
41 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
44 ret = vmcs_write(field,val);
46 if (ret != VMX_SUCCESS) {
47 PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
55 // For the 32 bit reserved bit fields
56 // MB1s are in the low 32 bits, MBZs are in the high 32 bits of the MSR
57 static uint32_t sanitize_bits1(uint32_t msr_num, uint32_t val) {
60 PrintDebug("sanitize_bits1 (MSR:%x)\n", msr_num);
62 v3_get_msr(msr_num, &mask_msr.hi, &mask_msr.lo);
64 PrintDebug("MSR %x = %x : %x \n", msr_num, mask_msr.hi, mask_msr.lo);
74 static addr_t sanitize_bits2(uint32_t msr_num0, uint32_t msr_num1, addr_t val) {
76 addr_t msr0_val, msr1_val;
78 PrintDebug("sanitize_bits2 (MSR0=%x, MSR1=%x)\n", msr_num0, msr_num1);
80 v3_get_msr(msr_num0, &msr0.hi, &msr0.lo);
81 v3_get_msr(msr_num1, &msr1.hi, &msr1.lo);
83 // This generates a mask that is the natural bit width of the CPU
84 msr0_val = msr0.value;
85 msr1_val = msr1.value;
87 PrintDebug("MSR %x = %p, %x = %p \n", msr_num0, (void*)msr0_val, msr_num1, (void*)msr1_val);
100 static addr_t allocate_vmcs() {
102 struct vmcs_data * vmcs_page = NULL;
104 PrintDebug("Allocating page\n");
106 vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
107 memset(vmcs_page, 0, 4096);
109 v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
111 vmcs_page->revision = ((struct vmx_basic_msr*)&msr)->revision;
112 PrintDebug("VMX Revision: 0x%x\n",vmcs_page->revision);
114 return (addr_t)V3_PAddr((void *)vmcs_page);
118 static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config_ptr) {
119 struct vmx_data * vmx_info = NULL;
122 v3_pre_config_guest(info, config_ptr);
124 vmx_info = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
126 PrintDebug("vmx_data pointer: %p\n", (void *)vmx_info);
128 PrintDebug("Allocating VMCS\n");
129 vmx_info->vmcs_ptr_phys = allocate_vmcs();
131 PrintDebug("VMCS pointer: %p\n", (void *)(vmx_info->vmcs_ptr_phys));
133 info->vmm_data = vmx_info;
135 PrintDebug("Initializing VMCS (addr=%p)\n", info->vmm_data);
137 // TODO: Fix vmcs fields so they're 32-bit
139 PrintDebug("Clearing VMCS: %p\n", (void *)vmx_info->vmcs_ptr_phys);
140 vmx_ret = vmcs_clear(vmx_info->vmcs_ptr_phys);
142 if (vmx_ret != VMX_SUCCESS) {
143 PrintError("VMCLEAR failed\n");
147 PrintDebug("Loading VMCS\n");
148 vmx_ret = vmcs_load(vmx_info->vmcs_ptr_phys);
150 if (vmx_ret != VMX_SUCCESS) {
151 PrintError("VMPTRLD failed\n");
157 /******* Setup Host State **********/
159 /* Cache GDTR, IDTR, and TR in host struct */
164 } __attribute__((packed)) tmp_seg;
167 __asm__ __volatile__(
173 gdtr_base = tmp_seg.base;
174 vmx_info->host_state.gdtr.base = gdtr_base;
176 __asm__ __volatile__(
182 vmx_info->host_state.idtr.base = tmp_seg.base;
184 __asm__ __volatile__(
190 vmx_info->host_state.tr.selector = tmp_seg.selector;
192 /* The GDTR *index* is bits 3-15 of the selector. */
193 struct tss_descriptor * desc = NULL;
194 desc = (struct tss_descriptor *)(gdtr_base + (8 * (tmp_seg.selector >> 3)));
196 tmp_seg.base = ((desc->base1) |
197 (desc->base2 << 16) |
198 (desc->base3 << 24) |
200 ((uint64_t)desc->base4 << 32)
206 vmx_info->host_state.tr.base = tmp_seg.base;
210 /********** Setup and VMX Control Fields from MSR ***********/
212 v3_init_vmx_io_map(info);
213 v3_init_vmx_msr_map(info);
215 struct v3_msr tmp_msr;
217 v3_get_msr(VMX_PINBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
219 /* Add external interrupts, NMI exiting, and virtual NMI */
220 vmx_info->pin_ctrls.value = tmp_msr.lo;
221 vmx_info->pin_ctrls.nmi_exit = 1;
222 vmx_info->pin_ctrls.ext_int_exit = 1;
224 v3_get_msr(VMX_PROCBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
226 vmx_info->pri_proc_ctrls.value = tmp_msr.lo;
227 vmx_info->pri_proc_ctrls.use_io_bitmap = 1;
228 vmx_info->pri_proc_ctrls.hlt_exit = 1;
229 vmx_info->pri_proc_ctrls.invlpg_exit = 1;
230 vmx_info->pri_proc_ctrls.use_msr_bitmap = 1;
231 vmx_info->pri_proc_ctrls.pause_exit = 1;
233 vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->io_map.arch_data));
234 vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR,
235 (addr_t)V3_PAddr(info->io_map.arch_data) + PAGE_SIZE_4KB);
237 vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(info->msr_map.arch_data));
239 v3_get_msr(VMX_EXIT_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
240 vmx_info->exit_ctrls.value = tmp_msr.lo;
241 vmx_info->exit_ctrls.host_64_on = 1;
243 if ((vmx_info->exit_ctrls.save_efer == 1) || (vmx_info->exit_ctrls.ld_efer == 1)) {
244 vmx_info->ia32e_avail = 1;
247 v3_get_msr(VMX_ENTRY_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
248 vmx_info->entry_ctrls.value = tmp_msr.lo;
251 struct vmx_exception_bitmap excp_bmap;
256 vmx_ret |= check_vmcs_write(VMCS_EXCP_BITMAP, excp_bmap.value);
258 /******* Setup VMXAssist guest state ***********/
261 info->vm_regs.rsp = 0x80000;
263 struct rflags * flags = (struct rflags *)&(info->ctrl_regs.rflags);
266 /* Print Control MSRs */
267 v3_get_msr(VMX_CR0_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
268 PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
270 v3_get_msr(VMX_CR4_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
271 PrintDebug("CR4 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
274 #define GUEST_CR0 0x80000031
275 #define GUEST_CR4 0x00002000
276 info->ctrl_regs.cr0 = GUEST_CR0;
277 info->ctrl_regs.cr4 = GUEST_CR4;
279 ((struct cr0_32 *)&(info->shdw_pg_state.guest_cr0))->pe = 1;
282 if (info->shdw_pg_mode == SHADOW_PAGING) {
283 PrintDebug("Creating initial shadow page table\n");
285 if (v3_init_passthrough_pts(info) == -1) {
286 PrintError("Could not initialize passthrough page tables\n");
290 #define CR0_PE 0x00000001
291 #define CR0_PG 0x80000000
294 vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG) );
295 vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
297 info->ctrl_regs.cr3 = info->direct_map_pt;
299 // vmx_info->pinbased_ctrls |= NMI_EXIT;
302 vmx_info->pri_proc_ctrls.cr3_ld_exit = 1;
303 vmx_info->pri_proc_ctrls.cr3_str_exit = 1;
306 // Setup segment registers
308 struct v3_segment * seg_reg = (struct v3_segment *)&(info->segments);
312 for (i = 0; i < 10; i++) {
313 seg_reg[i].selector = 3 << 3;
314 seg_reg[i].limit = 0xffff;
315 seg_reg[i].base = 0x0;
318 info->segments.cs.selector = 2<<3;
320 /* Set only the segment registers */
321 for (i = 0; i < 6; i++) {
322 seg_reg[i].limit = 0xfffff;
323 seg_reg[i].granularity = 1;
325 seg_reg[i].system = 1;
327 seg_reg[i].present = 1;
331 info->segments.cs.type = 0xb;
333 info->segments.ldtr.selector = 0x20;
334 info->segments.ldtr.type = 2;
335 info->segments.ldtr.system = 0;
336 info->segments.ldtr.present = 1;
337 info->segments.ldtr.granularity = 0;
340 /************* Map in GDT and vmxassist *************/
342 uint64_t gdt[] __attribute__ ((aligned(32))) = {
343 0x0000000000000000ULL, /* 0x00: reserved */
344 0x0000830000000000ULL, /* 0x08: 32-bit TSS */
345 //0x0000890000000000ULL, /* 0x08: 32-bit TSS */
346 0x00CF9b000000FFFFULL, /* 0x10: CS 32-bit */
347 0x00CF93000000FFFFULL, /* 0x18: DS 32-bit */
348 0x000082000000FFFFULL, /* 0x20: LDTR 32-bit */
351 #define VMXASSIST_GDT 0x10000
352 addr_t vmxassist_gdt = 0;
354 if (guest_pa_to_host_va(info, VMXASSIST_GDT, &vmxassist_gdt) == -1) {
355 PrintError("Could not find VMXASSIST GDT destination\n");
359 memcpy((void *)vmxassist_gdt, gdt, sizeof(uint64_t) * 5);
361 info->segments.gdtr.base = VMXASSIST_GDT;
363 #define VMXASSIST_TSS 0x40000
364 uint64_t vmxassist_tss = VMXASSIST_TSS;
365 gdt[0x08 / sizeof(gdt[0])] |=
366 ((vmxassist_tss & 0xFF000000) << (56 - 24)) |
367 ((vmxassist_tss & 0x00FF0000) << (32 - 16)) |
368 ((vmxassist_tss & 0x0000FFFF) << (16)) |
371 info->segments.tr.selector = 0x08;
372 info->segments.tr.base = vmxassist_tss;
374 //info->segments.tr.type = 0x9;
375 info->segments.tr.type = 0x3;
376 info->segments.tr.system = 0;
377 info->segments.tr.present = 1;
378 info->segments.tr.granularity = 0;
383 #define VMXASSIST_START 0x000d0000
384 extern uint8_t v3_vmxassist_start[];
385 extern uint8_t v3_vmxassist_end[];
386 addr_t vmxassist_dst = 0;
388 if (guest_pa_to_host_va(info, VMXASSIST_START, &vmxassist_dst) == -1) {
389 PrintError("Could not find VMXASSIST destination\n");
393 memcpy((void *)vmxassist_dst, v3_vmxassist_start, v3_vmxassist_end - v3_vmxassist_start);
396 /*** Write all the info to the VMCS ***/
398 #define DEBUGCTL_MSR 0x1d9
399 v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
400 vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
402 info->dbg_regs.dr7 = 0x400;
404 vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
406 if (v3_update_vmcs_ctrl_fields(info)) {
407 PrintError("Could not write control fields!\n");
411 if (v3_update_vmcs_host_state(info)) {
412 PrintError("Could not write host state\n");
417 if (v3_update_vmcs_guest_state(info) != VMX_SUCCESS) {
418 PrintError("Writing guest state failed!\n");
424 vmx_info->state = VMXASSIST_DISABLED;
426 v3_post_config_guest(info, config_ptr);
432 static int start_vmx_guest(struct guest_info* info) {
436 PrintDebug("Attempting VMLAUNCH\n");
438 info->run_state = VM_RUNNING;
440 rdtscll(info->time_state.cached_host_tsc);
442 ret = v3_vmx_vmlaunch(&(info->vm_regs), info, &(info->ctrl_regs));
444 if (ret != VMX_SUCCESS) {
445 vmcs_read(VMCS_INSTR_ERR, &error);
446 PrintError("VMLAUNCH failed: %d\n", error);
451 PrintDebug("Returned from VMLAUNCH ret=%d\n", ret);
457 int v3_is_vmx_capable() {
458 v3_msr_t feature_msr;
459 uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
461 v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
463 PrintDebug("ECX: 0x%x\n", ecx);
465 if (ecx & CPUID_1_ECX_VTXFLAG) {
466 v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
468 PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
470 if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
471 PrintDebug("VMX is locked -- enable in the BIOS\n");
476 PrintDebug("VMX not supported on this cpu\n");
483 static int has_vmx_nested_paging() {
489 void v3_init_vmx_cpu(int cpu_id) {
490 extern v3_cpu_arch_t v3_cpu_types[];
491 struct v3_msr tmp_msr;
494 v3_get_msr(VMX_CR4_FIXED0_MSR,&(tmp_msr.hi),&(tmp_msr.lo));
496 __asm__ __volatile__ (
498 "orq $0x00002000, %%rbx;"
505 if ((~ret & tmp_msr.value) == 0) {
506 __asm__ __volatile__ (
512 PrintError("Invalid CR4 Settings!\n");
516 __asm__ __volatile__ (
517 "movq %%cr0, %%rbx; "
518 "orq $0x00000020,%%rbx; "
525 // Should check and return Error here....
528 // Setup VMXON Region
529 host_vmcs_ptrs[cpu_id] = allocate_vmcs();
531 PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);
533 if (v3_enable_vmx(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
534 PrintDebug("VMX Enabled\n");
536 PrintError("VMX initialization failure\n");
541 if (has_vmx_nested_paging() == 1) {
542 v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
544 v3_cpu_types[cpu_id] = V3_VMX_CPU;
550 void v3_init_vmx_hooks(struct v3_ctrl_ops * vm_ops) {
552 // Setup the VMX specific vmm operations
553 vm_ops->init_guest = &init_vmx_guest;
554 vm_ops->start_guest = &start_vmx_guest;
555 vm_ops->has_nested_paging = &has_vmx_nested_paging;