2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Peter Dinda <pdinda@northwestern.edu>
11 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
13 * All rights reserved.
15 * Author: Peter Dinda <pdinda@northwestern.edu>
16 * Jack Lange <jarusl@cs.northwestern.edu>
18 * This is free software. You are permitted to use,
19 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
23 #include <palacios/vmx.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmm.h>
26 #include <palacios/vmx_lowlevel.h>
27 #include <palacios/vmm_lowlevel.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_ctrl_regs.h>
40 #include <palacios/vmm_util.h>
41 #include <palacios/vmm_string.h>
42 #include <palacios/vmm_ctrl_regs.h>
46 extern int Launch_VM(ullong_t vmcsPtr, uint_t eip);
48 #define NUMPORTS 65536
51 #define VMXASSIST_INFO_PORT 0x0e9
52 #define ROMBIOS_PANIC_PORT 0x400
53 #define ROMBIOS_PANIC_PORT2 0x401
54 #define ROMBIOS_INFO_PORT 0x402
55 #define ROMBIOS_DEBUG_PORT 0x403
59 static uint_t GetLinearIP(struct VM * vm) {
60 if (vm->state == VM_VMXASSIST_V8086_BIOS || vm->state == VM_VMXASSIST_V8086) {
61 return vm->vmcs.guestStateArea.cs.baseAddr + vm->vmcs.guestStateArea.rip;
63 return vm->vmcs.guestStateArea.rip;
71 #define INSTR_OFFSET_START 17
72 #define NOP_SEQ_LEN 10
73 #define INSTR_OFFSET_END (INSTR_OFFSET_START + NOP_SEQ_LEN - 1)
74 #define TEMPLATE_CODE_LEN 35
83 extern uint_t VMCS_LAUNCH();
84 extern uint_t Init_VMCS_HostState();
85 extern uint_t Init_VMCS_GuestState();
97 void DecodeCurrentInstruction(struct VM *vm, struct Instruction *inst)
99 // this is a gruesome hack
100 uint_t address = GetLinearIP(vm);
101 uint_t length = vm->vmcs.exitInfoFields.instrLength;
102 unsigned char *t = (unsigned char *) address;
106 PrintTrace("DecodeCurrentInstruction: instruction is\n");
107 PrintTraceMemDump(t,length);
109 if (length==3 && t[0]==0x0f && t[1]==0x22 && t[2]==0xc0) {
110 // mov from eax to cr0
111 // usually used to signal
112 inst->type=VM_MOV_TO_CR0;
113 inst->address=address;
115 inst->input1=vm->registers.eax;
116 inst->input2=vm->vmcs.guestStateArea.cr0;
117 inst->output=vm->registers.eax;
118 PrintTrace("MOV FROM EAX TO CR0\n");
120 inst->type=VM_UNKNOWN_INST;
126 static void ConfigureExits(struct VM *vm)
128 CopyOutVMCSExecCtrlFields(&(vm->vmcs.execCtrlFields));
130 vm->vmcs.execCtrlFields.pinCtrls |= 0
131 // EXTERNAL_INTERRUPT_EXITING
133 vm->vmcs.execCtrlFields.procCtrls |= 0
134 // INTERRUPT_WINDOWS_EXIT
142 | UNCONDITION_IO_EXITING
146 CopyInVMCSExecCtrlFields(&(vm->vmcs.execCtrlFields));
148 CopyOutVMCSExitCtrlFields(&(vm->vmcs.exitCtrlFields));
150 vm->vmcs.exitCtrlFields.exitCtrls |= ACK_IRQ_ON_EXIT;
152 CopyInVMCSExitCtrlFields(&(vm->vmcs.exitCtrlFields));
155 /* VMCS_READ(VM_EXIT_CTRLS, &flags); */
156 /* flags |= ACK_IRQ_ON_EXIT; */
157 /* VMCS_WRITE(VM_EXIT_CTRLS, &flags); */
162 extern int SAFE_VM_LAUNCH();
164 int MyLaunch(struct VM *vm)
166 ullong_t vmcs = (ullong_t)((uint_t) (vm->vmcsregion));
167 uint_t entry_eip = vm->descriptor.entry_ip;
168 uint_t exit_eip = vm->descriptor.exit_eip;
169 uint_t guest_esp = vm->descriptor.guest_esp;
170 uint_t f = 0xffffffff;
175 PrintTrace("Guest ESP: 0x%x (%u)\n", guest_esp, guest_esp);
177 exit_eip = (uint_t)RunVMM;
179 PrintTrace("Clear\n");
181 PrintTrace("Load\n");
185 PrintTrace("VMCS_LINK_PTR\n");
186 VMCS_WRITE(VMCS_LINK_PTR, &f);
187 PrintTrace("VMCS_LINK_PTR_HIGH\n");
188 VMCS_WRITE(VMCS_LINK_PTR_HIGH, &f);
191 SetCtrlBitsCorrectly(IA32_VMX_PINBASED_CTLS_MSR, PIN_VM_EXEC_CTRLS);
192 SetCtrlBitsCorrectly(IA32_VMX_PROCBASED_CTLS_MSR, PROC_VM_EXEC_CTRLS);
193 SetCtrlBitsCorrectly(IA32_VMX_EXIT_CTLS_MSR, VM_EXIT_CTRLS);
194 SetCtrlBitsCorrectly(IA32_VMX_ENTRY_CTLS_MSR, VM_ENTRY_CTRLS);
198 //SetCtrlBitsCorrectly(IA32_something,GUEST_IA32_DEBUGCTL);
199 //SetCtrlBitsCorrectly(IA32_something,GUEST_IA32_DEBUGCTL_HIGH);
203 PrintTrace("Setting up host state\n");
204 SetCRBitsCorrectly(IA32_VMX_CR0_FIXED0_MSR, IA32_VMX_CR0_FIXED1_MSR, HOST_CR0);
205 SetCRBitsCorrectly(IA32_VMX_CR4_FIXED0_MSR, IA32_VMX_CR4_FIXED1_MSR, HOST_CR4);
206 ret = Init_VMCS_HostState();
208 if (ret != VMX_SUCCESS) {
209 if (ret == VMX_FAIL_VALID) {
210 PrintTrace("Init Host state: VMCS FAILED WITH ERROR\n");
212 PrintTrace("Init Host state: Invalid VMCS\n");
217 // PrintTrace("HOST_RIP: %x (%u)\n", exit_eip, exit_eip);
218 VMCS_WRITE(HOST_RIP, &exit_eip);
221 PrintTrace("Setting up guest state\n");
222 PrintTrace("GUEST_RIP: %x (%u)\n", entry_eip, entry_eip);
223 VMCS_WRITE(GUEST_RIP, &entry_eip);
225 SetCRBitsCorrectly(IA32_VMX_CR0_FIXED0_MSR, IA32_VMX_CR0_FIXED1_MSR, GUEST_CR0);
226 SetCRBitsCorrectly(IA32_VMX_CR4_FIXED0_MSR, IA32_VMX_CR4_FIXED1_MSR, GUEST_CR4);
227 ret = Init_VMCS_GuestState();
229 PrintTrace("InitGuestState returned\n");
231 if (ret != VMX_SUCCESS) {
232 if (ret == VMX_FAIL_VALID) {
233 PrintTrace("Init Guest state: VMCS FAILED WITH ERROR\n");
235 PrintTrace("Init Guest state: Invalid VMCS\n");
239 PrintTrace("GUEST_RSP: %x (%u)\n", guest_esp, (uint_t)guest_esp);
240 VMCS_WRITE(GUEST_RSP, &guest_esp);
244 if (VMCS_WRITE(EXCEPTION_BITMAP, &tmpReg) != VMX_SUCCESS) {
245 PrintInfo("Bitmap error\n");
250 PrintTrace("VMCS_LAUNCH\n");
252 vm->state=VM_VMXASSIST_STARTUP;
254 vmm_ret = SAFE_VM_LAUNCH();
256 PrintTrace("VMM error %d\n", vmm_ret);
264 int VMLaunch(struct VMDescriptor *vm)
266 VMCS * vmcs = CreateVMCS();
269 ullong_t vmcs_ptr = (ullong_t)((uint_t)vmcs);
270 uint_t top = (vmcs_ptr >> 32) & 0xffffffff;
271 uint_t bottom = (vmcs_ptr) & 0xffffffff;
273 theVM.vmcsregion = vmcs;
274 theVM.descriptor = *vm;
276 PrintTrace("vmcs_ptr_top=%x vmcs_ptr_bottom=%x, eip=%x\n", top, bottom, vm->entry_ip);
277 rc = MyLaunch(&theVM); // vmcs_ptr, vm->entry_ip, vm->exit_eip, vm->guest_esp);
278 PrintTrace("Returned from MyLaunch();\n");
293 static int update_vmcs_host_state(struct guest_info * info) {
299 } __attribute__((packed)) tmp_seg;
302 struct v3_msr tmp_msr;
304 __asm__ __volatile__ ( "movq %%cr0, %0; "
308 vmcs_write(VMCS_HOST_CR0, tmp);
311 __asm__ __volatile__ ( "movq %%cr3, %0; "
315 vmcs_write(VMCS_HOST_CR3, tmp);
318 __asm__ __volatile__ ( "movq %%cr4, %0; "
322 vmcs_write(VMCS_HOST_CR4, tmp);
327 __asm__ __volatile__ ("sgdt (%0); "
332 vmcs_write(VMCS_HOST_GDTR_BASE, tmp_seg.base);
335 __asm__ __volatile__ ("sidt (%0); "
340 vmcs_write(VMCS_HOST_IDTR_BASE, tmp_seg.base);
342 /* How do we handle this...?
343 __asm__ __volatile__ ("str (%0); "
348 vmcs_write(VMCS_HOST_TR_BASE, tmp_seg.base);
351 #define FS_BASE_MSR 0xc0000100
352 #define GS_BASE_MSR 0xc0000101
355 v3_get_msr(FS_BASE_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
356 vmcs_write(VMCS_HOST_FS_BASE, tmp_msr.value);
359 v3_get_msr(GS_BASE_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
360 vmcs_write(VMCS_HOST_GS_BASE, tmp_msr.value);
364 __asm__ __volatile__ ( "movq %%cs, %0; "
368 vmcs_write(VMCS_HOST_CS_SELECTOR, tmp);
370 __asm__ __volatile__ ( "movq %%ss, %0; "
374 vmcs_write(VMCS_HOST_SS_SELECTOR, tmp);
376 __asm__ __volatile__ ( "movq %%ds, %0; "
380 vmcs_write(VMCS_HOST_DS_SELECTOR, tmp);
382 __asm__ __volatile__ ( "movq %%es, %0; "
386 vmcs_write(VMCS_HOST_ES_SELECTOR, tmp);
388 __asm__ __volatile__ ( "movq %%fs, %0; "
392 vmcs_write(VMCS_HOST_FS_SELECTOR, tmp);
394 __asm__ __volatile__ ( "movq %%gs, %0; "
398 vmcs_write(VMCS_HOST_GS_SELECTOR, tmp);
400 __asm__ __volatile__ ( "str %0; "
404 vmcs_write(VMCS_HOST_TR_SELECTOR, tmp);
407 #define SYSENTER_CS_MSR 0x00000174
408 #define SYSENTER_ESP_MSR 0x00000175
409 #define SYSENTER_EIP_MSR 0x00000176
412 v3_get_msr(SYSENTER_CS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
413 vmcs_write(VMCS_HOST_SYSENTER_CS, tmp_msr.value);
416 v3_get_msr(SYSENTER_ESP_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
417 vmcs_write(VMCS_HOST_SYSENTER_ESP, tmp_msr.value);
420 v3_get_msr(SYSENTER_EIP_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
421 vmcs_write(VMCS_HOST_SYSENTER_EIP, tmp_msr.value);
434 static addr_t vmxon_ptr_phys;
438 // For the 32 bit reserved bit fields
439 // MB1s are in the low 32 bits, MBZs are in the high 32 bits of the MSR
440 static uint32_t sanitize_bits1(uint32_t msr_num, uint32_t val) {
443 PrintDebug("sanitize_bits1 (MSR:%x)\n", msr_num);
445 v3_get_msr(msr_num, &mask_msr.hi, &mask_msr.lo);
447 PrintDebug("MSR %x = %x : %x \n", msr_num, mask_msr.hi, mask_msr.lo);
457 static addr_t sanitize_bits2(uint32_t msr_num0, uint32_t msr_num1, addr_t val) {
459 addr_t msr0_val, msr1_val;
461 PrintDebug("sanitize_bits2 (MSR0=%x, MSR1=%x)\n", msr_num0, msr_num1);
463 v3_get_msr(msr_num0, &msr0.hi, &msr0.lo);
464 v3_get_msr(msr_num1, &msr1.hi, &msr1.lo);
466 // This generates a mask that is the natural bit width of the CPU
467 msr0_val = msr0.value;
468 msr1_val = msr1.value;
470 PrintDebug("MSR %x = %p, %x = %p \n", msr_num0, (void*)msr0_val, msr_num1, (void*)msr1_val);
478 static int setup_base_host_state() {
482 // vmwrite(HOST_IDTR_BASE,
490 static void setup_v8086_mode_for_boot(struct guest_info* vm_info)
493 ((struct vmx_data *)vm_info->vmm_data)->state = VMXASSIST_V8086_BIOS;
494 ((struct rflags *)&(vm_info->ctrl_regs.rflags))->vm = 1;
495 ((struct rflags *)&(vm_info->ctrl_regs.rflags))->iopl = 3;
497 vm_info->rip = 0xfff0;
499 vm_info->segments.cs.selector = 0xf000;
500 vm_info->segments.cs.base = 0xf000 << 4;
501 vm_info->segments.cs.limit = 0xffff;
502 vm_info->segments.cs.type = 3;
503 vm_info->segments.cs.system = 1;
504 vm_info->segments.cs.dpl = 3;
505 vm_info->segments.cs.present = 1;
506 vm_info->segments.cs.granularity = 0;
509 struct v3_segment * seg_ptr = (struct v3_segment *)&(vm_info->segments);
511 /* Set values for selectors ds through ss */
512 for(i = 1; i < 6 ; i++) {
513 seg_ptr[i].selector = 0x0000;
514 seg_ptr[i].base = 0x00000;
516 seg_ptr[i].system = 1;
518 seg_ptr[i].present = 1;
519 seg_ptr[i].granularity = 0;
524 static addr_t allocate_vmcs()
527 PrintDebug("Allocating page\n");
528 struct vmcs_data * vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
531 memset(vmcs_page, 0, 4096);
533 v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
535 vmcs_page->revision = ((struct vmx_basic_msr*)&msr)->revision;
536 PrintDebug("VMX Revision: 0x%x\n",vmcs_page->revision);
538 return (addr_t)V3_PAddr((void*)vmcs_page);
543 static void init_vmcs_bios(struct guest_info * vm_info)
546 setup_v8086_mode_for_boot(vm_info);
548 // TODO: Fix vmcs fields so they're 32-bit
554 static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config_ptr) {
555 PrintDebug("Entering init_vmx_guest\n");
556 v3_pre_config_guest(info, config_ptr);
558 struct vmx_data* data;
560 data = (struct vmx_data*)V3_Malloc(sizeof(struct vmx_data));
561 PrintDebug("vmx_data pointer: %p\n",(void*)data);
563 PrintDebug("Allocating VMCS\n");
564 data->vmcs_ptr_phys = allocate_vmcs();
565 PrintDebug("VMCS pointer: %p\n",(void*)data->vmcs_ptr_phys);
567 info->vmm_data = (void *)data;
569 PrintDebug("Initializing VMCS (addr=%p)\n", info->vmm_data);
570 init_vmcs_bios(info);
572 // v3_post_config_guest(info, config_ptr);
578 static int inline check_vmcs_write(vmcs_field_t field, addr_t val)
581 ret = vmcs_write(field,val);
583 if (ret != VMX_SUCCESS) {
584 PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
591 static void inline translate_segment_access(struct v3_segment * v3_seg,
592 struct vmcs_segment_access * access)
594 access->type = v3_seg->type;
595 access->desc_type = v3_seg->system;
596 access->dpl = v3_seg->dpl;
597 access->present = v3_seg->present;
598 access->avail = v3_seg->avail;
599 access->long_mode = v3_seg->long_mode;
600 access->db = v3_seg->db;
601 access->granularity = v3_seg->granularity;
604 static int inline vmcs_write_guest_segments(struct guest_info* info)
607 struct vmcs_segment_access access;
610 translate_segment_access(&(info->segments.cs), &access);
612 ret &= check_vmcs_write(VMCS_GUEST_CS_BASE, info->segments.cs.base);
613 ret &= check_vmcs_write(VMCS_GUEST_CS_SELECTOR, info->segments.cs.selector);
614 ret &= check_vmcs_write(VMCS_GUEST_CS_LIMIT, info->segments.cs.limit);
615 ret &= check_vmcs_write(VMCS_GUEST_CS_ACCESS, access.value);
618 translate_segment_access(&(info->segments.ss), &access);
620 ret &= check_vmcs_write(VMCS_GUEST_SS_BASE, info->segments.ss.base);
621 ret &= check_vmcs_write(VMCS_GUEST_SS_SELECTOR, info->segments.ss.selector);
622 ret &= check_vmcs_write(VMCS_GUEST_SS_LIMIT, info->segments.ss.limit);
623 ret &= check_vmcs_write(VMCS_GUEST_SS_ACCESS, access.value);
626 translate_segment_access(&(info->segments.ds), &access);
628 ret &= check_vmcs_write(VMCS_GUEST_DS_BASE, info->segments.ds.base);
629 ret &= check_vmcs_write(VMCS_GUEST_DS_SELECTOR, info->segments.ds.selector);
630 ret &= check_vmcs_write(VMCS_GUEST_DS_LIMIT, info->segments.ds.limit);
631 ret &= check_vmcs_write(VMCS_GUEST_DS_ACCESS, access.value);
635 translate_segment_access(&(info->segments.es), &access);
637 ret &= check_vmcs_write(VMCS_GUEST_ES_BASE, info->segments.es.base);
638 ret &= check_vmcs_write(VMCS_GUEST_ES_SELECTOR, info->segments.es.selector);
639 ret &= check_vmcs_write(VMCS_GUEST_ES_LIMIT, info->segments.es.limit);
640 ret &= check_vmcs_write(VMCS_GUEST_ES_ACCESS, access.value);
643 translate_segment_access(&(info->segments.fs), &access);
645 ret &= check_vmcs_write(VMCS_GUEST_FS_BASE, info->segments.fs.base);
646 ret &= check_vmcs_write(VMCS_GUEST_FS_SELECTOR, info->segments.fs.selector);
647 ret &= check_vmcs_write(VMCS_GUEST_FS_LIMIT, info->segments.fs.limit);
648 ret &= check_vmcs_write(VMCS_GUEST_FS_ACCESS, access.value);
651 translate_segment_access(&(info->segments.gs), &access);
653 ret &= check_vmcs_write(VMCS_GUEST_GS_BASE, info->segments.gs.base);
654 ret &= check_vmcs_write(VMCS_GUEST_GS_SELECTOR, info->segments.gs.selector);
655 ret &= check_vmcs_write(VMCS_GUEST_GS_LIMIT, info->segments.gs.limit);
656 ret &= check_vmcs_write(VMCS_GUEST_GS_ACCESS, access.value);
661 static int start_vmx_guest(struct guest_info* info) {
662 struct vmx_data * vmx_data = (struct vmx_data *)info->vmm_data;
665 // Have to do a whole lot of flag setting here
666 PrintDebug("Clearing VMCS\n");
667 vmx_ret = vmcs_clear(vmx_data->vmcs_ptr_phys);
669 if (vmx_ret != VMX_SUCCESS) {
670 PrintError("VMCLEAR failed\n");
674 PrintDebug("Loading VMCS\n");
675 vmx_ret = vmcs_load(vmx_data->vmcs_ptr_phys);
677 if (vmx_ret != VMX_SUCCESS) {
678 PrintError("VMPTRLD failed\n");
683 update_vmcs_host_state(info);
686 // TODO: This is not 32-bit safe!
687 vmx_ret &= check_vmcs_write(VMCS_GUEST_RIP, info->rip);
689 vmx_ret &= vmcs_write_guest_segments(info);
692 PrintError("Could not initialize VMCS segments\n");
696 v3_print_vmcs_guest_state();
706 int v3_is_vmx_capable() {
707 v3_msr_t feature_msr;
708 addr_t eax = 0, ebx = 0, ecx = 0, edx = 0;
710 v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
712 PrintDebug("ECX: %p\n", (void*)ecx);
714 if (ecx & CPUID_1_ECX_VTXFLAG) {
715 v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
717 PrintTrace("MSRREGlow: 0x%.8x\n", feature_msr.lo);
719 if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
720 PrintDebug("VMX is locked -- enable in the BIOS\n");
725 PrintDebug("VMX not supported on this cpu\n");
732 static int has_vmx_nested_paging() {
738 void v3_init_vmx(struct v3_ctrl_ops * vm_ops) {
739 extern v3_cpu_arch_t v3_cpu_type;
742 __asm__ __volatile__ (
743 "movq %%cr4, %%rbx; "
744 "orq $0x00002000,%%rbx; "
753 // Should check and return Error here....
754 __asm__ __volatile__ (
755 "movq %%cr0, %%rbx; "
756 "orq $0x00000020,%%rbx; "
763 // Setup VMXON Region
764 vmxon_ptr_phys = allocate_vmcs();
765 PrintDebug("VMXON pointer: 0x%p\n", (void*)vmxon_ptr_phys);
767 if (v3_enable_vmx(vmxon_ptr_phys) == VMX_SUCCESS) {
768 PrintDebug("VMX Enabled\n");
770 PrintError("VMX initialization failure\n");
775 if (has_vmx_nested_paging() == 1) {
776 v3_cpu_type = V3_VMX_EPT_CPU;
778 v3_cpu_type = V3_VMX_CPU;
781 // Setup the VMX specific vmm operations
782 vm_ops->init_guest = &init_vmx_guest;
783 vm_ops->start_guest = &start_vmx_guest;
784 vm_ops->has_nested_paging = &has_vmx_nested_paging;