2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Peter Dinda <pdinda@northwestern.edu>
11 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
13 * All rights reserved.
15 * Author: Peter Dinda <pdinda@northwestern.edu>
16 * Jack Lange <jarusl@cs.northwestern.edu>
18 * This is free software. You are permitted to use,
19 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
23 #include <palacios/vmx.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmm.h>
36 #include <palacios/vmm_util.h>
37 #include <palacios/vmm_string.h>
41 extern int Launch_VM(ullong_t vmcsPtr, uint_t eip);
43 #define NUMPORTS 65536
46 #define VMXASSIST_INFO_PORT 0x0e9
47 #define ROMBIOS_PANIC_PORT 0x400
48 #define ROMBIOS_PANIC_PORT2 0x401
49 #define ROMBIOS_INFO_PORT 0x402
50 #define ROMBIOS_DEBUG_PORT 0x403
53 static struct VM theVM;
55 static uint_t GetLinearIP(struct VM * vm) {
56 if (vm->state == VM_VMXASSIST_V8086_BIOS || vm->state == VM_VMXASSIST_V8086) {
57 return vm->vmcs.guestStateArea.cs.baseAddr + vm->vmcs.guestStateArea.rip;
59 return vm->vmcs.guestStateArea.rip;
67 #define INSTR_OFFSET_START 17
68 #define NOP_SEQ_LEN 10
69 #define INSTR_OFFSET_END (INSTR_OFFSET_START + NOP_SEQ_LEN - 1)
70 #define TEMPLATE_CODE_LEN 35
80 extern uint_t VMCS_CLEAR();
81 extern uint_t VMCS_LOAD();
82 extern uint_t VMCS_STORE();
83 extern uint_t VMCS_LAUNCH();
84 extern uint_t VMCS_RESUME();
85 extern uint_t Init_VMCS_HostState();
86 extern uint_t Init_VMCS_GuestState();
98 void DecodeCurrentInstruction(struct VM *vm, struct Instruction *inst)
100 // this is a gruesome hack
101 uint_t address = GetLinearIP(vm);
102 uint_t length = vm->vmcs.exitInfoFields.instrLength;
103 unsigned char *t = (unsigned char *) address;
107 PrintTrace("DecodeCurrentInstruction: instruction is\n");
108 PrintTraceMemDump(t,length);
110 if (length==3 && t[0]==0x0f && t[1]==0x22 && t[2]==0xc0) {
111 // mov from eax to cr0
112 // usually used to signal
113 inst->type=VM_MOV_TO_CR0;
114 inst->address=address;
116 inst->input1=vm->registers.eax;
117 inst->input2=vm->vmcs.guestStateArea.cr0;
118 inst->output=vm->registers.eax;
119 PrintTrace("MOV FROM EAX TO CR0\n");
121 inst->type=VM_UNKNOWN_INST;
126 static void V8086ModeSegmentRegisterFixup(struct VM *vm)
128 vm->vmcs.guestStateArea.cs.baseAddr = vm->vmcs.guestStateArea.cs.selector << 4;
129 vm->vmcs.guestStateArea.es.baseAddr = vm->vmcs.guestStateArea.es.selector << 4;
130 vm->vmcs.guestStateArea.ss.baseAddr = vm->vmcs.guestStateArea.ss.selector << 4;
131 vm->vmcs.guestStateArea.ds.baseAddr = vm->vmcs.guestStateArea.ds.selector << 4;
132 vm->vmcs.guestStateArea.fs.baseAddr = vm->vmcs.guestStateArea.fs.selector << 4;
133 vm->vmcs.guestStateArea.gs.baseAddr = vm->vmcs.guestStateArea.gs.selector << 4;
136 static void SetupV8086ModeForBoot(struct VM *vm)
138 vm->state = VM_VMXASSIST_V8086_BIOS;
140 // Put guest into V8086 mode on return
141 vm->vmcs.guestStateArea.rflags |= EFLAGS_VM | EFLAGS_IOPL_HI | EFLAGS_IOPL_LO ;
143 // We will start at f000:fff0 on return
145 // We want this to look as much as possible as a processor
147 vm->vmcs.guestStateArea.rip = 0xfff0; // note, 16 bit rip
148 vm->vmcs.guestStateArea.cs.selector = 0xf000;
149 vm->vmcs.guestStateArea.cs.limit = 0xffff;
150 vm->vmcs.guestStateArea.cs.access.as_dword = 0xf3;
152 vm->vmcs.guestStateArea.ss.selector = 0x0000;
153 vm->vmcs.guestStateArea.ss.limit = 0xffff;
154 vm->vmcs.guestStateArea.ss.access.as_dword = 0xf3;
156 vm->vmcs.guestStateArea.ds.selector = 0x0000;
157 vm->vmcs.guestStateArea.ds.limit = 0xffff;
158 vm->vmcs.guestStateArea.ds.access.as_dword = 0xf3;
160 vm->vmcs.guestStateArea.es.selector = 0x0000;
161 vm->vmcs.guestStateArea.es.limit = 0xffff;
162 vm->vmcs.guestStateArea.es.access.as_dword = 0xf3;
164 vm->vmcs.guestStateArea.fs.selector = 0x0000;
165 vm->vmcs.guestStateArea.fs.limit = 0xffff;
166 vm->vmcs.guestStateArea.fs.access.as_dword = 0xf3;
168 vm->vmcs.guestStateArea.gs.selector = 0x0000;
169 vm->vmcs.guestStateArea.gs.limit = 0xffff;
170 vm->vmcs.guestStateArea.gs.access.as_dword = 0xf3;
172 V8086ModeSegmentRegisterFixup(vm);
174 PrintTrace_VMCSData(&(vm->vmcs));
181 static void ConfigureExits(struct VM *vm)
183 CopyOutVMCSExecCtrlFields(&(vm->vmcs.execCtrlFields));
185 vm->vmcs.execCtrlFields.pinCtrls |= 0
186 // EXTERNAL_INTERRUPT_EXITING
188 vm->vmcs.execCtrlFields.procCtrls |= 0
189 // INTERRUPT_WINDOWS_EXIT
197 | UNCONDITION_IO_EXITING
201 CopyInVMCSExecCtrlFields(&(vm->vmcs.execCtrlFields));
203 CopyOutVMCSExitCtrlFields(&(vm->vmcs.exitCtrlFields));
205 vm->vmcs.exitCtrlFields.exitCtrls |= ACK_IRQ_ON_EXIT;
207 CopyInVMCSExitCtrlFields(&(vm->vmcs.exitCtrlFields));
210 /* VMCS_READ(VM_EXIT_CTRLS, &flags); */
211 /* flags |= ACK_IRQ_ON_EXIT; */
212 /* VMCS_WRITE(VM_EXIT_CTRLS, &flags); */
217 extern int SAFE_VM_LAUNCH();
219 int MyLaunch(struct VM *vm)
221 ullong_t vmcs = (ullong_t)((uint_t) (vm->vmcsregion));
222 uint_t entry_eip = vm->descriptor.entry_ip;
223 uint_t exit_eip = vm->descriptor.exit_eip;
224 uint_t guest_esp = vm->descriptor.guest_esp;
225 uint_t f = 0xffffffff;
230 PrintTrace("Guest ESP: 0x%x (%u)\n", guest_esp, guest_esp);
232 exit_eip = (uint_t)RunVMM;
234 PrintTrace("Clear\n");
236 PrintTrace("Load\n");
240 PrintTrace("VMCS_LINK_PTR\n");
241 VMCS_WRITE(VMCS_LINK_PTR, &f);
242 PrintTrace("VMCS_LINK_PTR_HIGH\n");
243 VMCS_WRITE(VMCS_LINK_PTR_HIGH, &f);
246 SetCtrlBitsCorrectly(IA32_VMX_PINBASED_CTLS_MSR, PIN_VM_EXEC_CTRLS);
247 SetCtrlBitsCorrectly(IA32_VMX_PROCBASED_CTLS_MSR, PROC_VM_EXEC_CTRLS);
248 SetCtrlBitsCorrectly(IA32_VMX_EXIT_CTLS_MSR, VM_EXIT_CTRLS);
249 SetCtrlBitsCorrectly(IA32_VMX_ENTRY_CTLS_MSR, VM_ENTRY_CTRLS);
253 //SetCtrlBitsCorrectly(IA32_something,GUEST_IA32_DEBUGCTL);
254 //SetCtrlBitsCorrectly(IA32_something,GUEST_IA32_DEBUGCTL_HIGH);
258 PrintTrace("Setting up host state\n");
259 SetCRBitsCorrectly(IA32_VMX_CR0_FIXED0_MSR, IA32_VMX_CR0_FIXED1_MSR, HOST_CR0);
260 SetCRBitsCorrectly(IA32_VMX_CR4_FIXED0_MSR, IA32_VMX_CR4_FIXED1_MSR, HOST_CR4);
261 ret = Init_VMCS_HostState();
263 if (ret != VMX_SUCCESS) {
264 if (ret == VMX_FAIL_VALID) {
265 PrintTrace("Init Host state: VMCS FAILED WITH ERROR\n");
267 PrintTrace("Init Host state: Invalid VMCS\n");
272 // PrintTrace("HOST_RIP: %x (%u)\n", exit_eip, exit_eip);
273 VMCS_WRITE(HOST_RIP, &exit_eip);
276 PrintTrace("Setting up guest state\n");
277 PrintTrace("GUEST_RIP: %x (%u)\n", entry_eip, entry_eip);
278 VMCS_WRITE(GUEST_RIP, &entry_eip);
280 SetCRBitsCorrectly(IA32_VMX_CR0_FIXED0_MSR, IA32_VMX_CR0_FIXED1_MSR, GUEST_CR0);
281 SetCRBitsCorrectly(IA32_VMX_CR4_FIXED0_MSR, IA32_VMX_CR4_FIXED1_MSR, GUEST_CR4);
282 ret = Init_VMCS_GuestState();
284 PrintTrace("InitGuestState returned\n");
286 if (ret != VMX_SUCCESS) {
287 if (ret == VMX_FAIL_VALID) {
288 PrintTrace("Init Guest state: VMCS FAILED WITH ERROR\n");
290 PrintTrace("Init Guest state: Invalid VMCS\n");
294 PrintTrace("GUEST_RSP: %x (%u)\n", guest_esp, (uint_t)guest_esp);
295 VMCS_WRITE(GUEST_RSP, &guest_esp);
299 if (VMCS_WRITE(EXCEPTION_BITMAP, &tmpReg) != VMX_SUCCESS) {
300 PrintInfo("Bitmap error\n");
305 PrintTrace("VMCS_LAUNCH\n");
307 vm->state=VM_VMXASSIST_STARTUP;
309 vmm_ret = SAFE_VM_LAUNCH();
311 PrintTrace("VMM error %d\n", vmm_ret);
319 int VMLaunch(struct VMDescriptor *vm)
321 VMCS * vmcs = CreateVMCS();
324 ullong_t vmcs_ptr = (ullong_t)((uint_t)vmcs);
325 uint_t top = (vmcs_ptr >> 32) & 0xffffffff;
326 uint_t bottom = (vmcs_ptr) & 0xffffffff;
328 theVM.vmcsregion = vmcs;
329 theVM.descriptor = *vm;
331 PrintTrace("vmcs_ptr_top=%x vmcs_ptr_bottom=%x, eip=%x\n", top, bottom, vm->entry_ip);
332 rc = MyLaunch(&theVM); // vmcs_ptr, vm->entry_ip, vm->exit_eip, vm->guest_esp);
333 PrintTrace("Returned from MyLaunch();\n");
347 // For the 32 bit reserved bit fields
348 // MB1s are in the low 32 bits, MBZs are in the high 32 bits of the MSR
349 static uint32_t sanitize_bits1(uint32_t msr_num, uint32_t val) {
352 PrintDebug("sanitize_bits1 (MSR:%x)\n", msr_num);
354 v3_get_msr(msr_num, &mask_msr.hi, &mask_msr.lo);
356 PrintDebug("MSR %x = %x : %x \n", msr_num, msr.hi, msr.lo);
365 static addr_t sanitize_bits2(uint32_t msr_num0, uint32_t msr_num1, addr_t val) {
367 addr_t msr0_val, msr1_val;
369 PrintDebug("sanitize_bits2 (MSR0=%x, MSR1=%x)\n", msr_num0, msr_num1);
371 v3_get_msr(msr_num0, &msr0.hi, &msr0.lo);
372 v3_get_msr(msr_num1, &msr1.hi, &msr1.lo);
374 // This generates a mask that is the natural bit width of the CPU
375 msr0_val = msr0.value;
376 msr1_val = msr1.value;
378 PrintDebug("MSR %x = %p, %x = %p \n", msr_num0, msr0_val, msr_num1, msr1_val);
388 static vmcs_t * allocate_vmcs() {
390 vmcs_t * vmcs_page = (vmcs_t *)V3_VAddr(V3_AllocPages(1));
392 memset(vmcb_page, 0, 4096);
394 v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
396 *(uint32_t *)vmcs_page = ((struct vmx_basic_msr *)msr.val)->revision;
403 static void init_vmcs_bios(vmcs_t * vmcs, struct guest_info * vm_info) {
409 static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config_ptr) {
410 v3_pre_config_guest(info, config_ptr);
412 PrintDebug("Allocating VMCS\n");
413 info->vmm_data = (void *)allocate_vmcs();
415 PrintDebug("Initializing VMCS (addr=%p)\n", (void *)info->vmm_data);
416 init_vmcs_bios((vmcs_t *)(info->vmm_data), info);
418 v3_post_config_guest(info, config_ptr);
426 static int start_svm_guest(struct guest_info *info) {
435 int v3_is_vmx_capable() {
437 v3_msr_t feature_msr;
438 addr_t eax = 0, ebx = 0, ecx = 0, edx = 0;
440 v3_cpuid(CPUID_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
442 if (ecx & CPUID_1_ECX_VTXFLAG) {
443 v3_get_msr(IA32_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
445 PrintTrace("MSRREGlow: 0x%.8x\n", feature_msr.lo);
447 if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
448 PrintDebug("VMX is locked -- enable in the BIOS\n");
453 PrintDebug("VMX not supported on this cpu\n");
460 static int has_vmx_nested_paging() {
466 // We set up the global host state that is unlikely to change across processes here
467 // Segment Descriptors mainly
469 struct seg_descriptor {
474 static int setup_base_host_state() {
479 // vmwrite(HOST_IDTR_BASE,
486 void v3_init_vmx(struct v3_ctrl_ops * vm_ops) {
489 // Setup the host state save area
490 void * host_state = V3_AllocPages(1);
492 v3_get_msr(VMX_BASIC_MSR, &(basic_msr.hi), &(basic_msr.lo));
494 *(uint32_t *)host_state = ((struct vmx_basic_msr *)basic_msr.value)->revision;
496 PrintDebug("VMX revision: 0x%p\n", host_state);
498 __asm__ __volatile__ (
499 "movl %%cr4, %%ebx; "
500 "orl %%ebx, 0x00002000; "
506 // Should check and return Error here....
507 __asm__ __volatile__ (
508 "movl %%cr0, %%ebx; "
509 "orl %%ebx, 0x00000020; "
514 if (v3_enable_vmx(host_state) == 0) {
515 PrintDebug("VMX Enabled\n");
517 PrintError("VMX initialization failure\n");
522 if (has_vmx_nested_paging() == 1) {
523 v3_cpu_type = V3_VMX_EPT_CPU;
525 v3_cpu_type = V3_VMX_CPU;
528 // Setup the VMX specific vmm operations
529 vmm_ops->init_guest = &init_vmx_guest;
530 vmm_ops->start_guest = &start_vmx_guest;
531 vmm_ops->has_nested_paging = &has_vmx_nested_paging;