Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Merge branch 'devel' of /home-remote/palacios/palacios into devel
[palacios-OLD.git] / palacios / src / palacios / vmx.c
index 332e6ad..4326788 100644 (file)
 #include <palacios/vmm_lowlevel.h>
 #include <palacios/vmm_ctrl_regs.h>
 #include <palacios/vmm_config.h>
+#include <palacios/vmm_time.h>
 #include <palacios/vm_guest_mem.h>
 #include <palacios/vmm_direct_paging.h>
 #include <palacios/vmx_io.h>
 #include <palacios/vmx_msr.h>
 
+#include <palacios/vmx_hw_info.h>
 
 #ifndef CONFIG_DEBUG_VMX
 #undef PrintDebug
 #endif
 
 
-static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
+/* These fields contain the hardware feature sets supported by the local CPU */
+static struct vmx_hw_info hw_info;
 
 
+static addr_t active_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
+static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
 
 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
@@ -50,7 +55,7 @@ extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, str
 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
     int ret = 0;
 
-    ret = vmcs_write(field,val);
+    ret = vmcs_write(field, val);
 
     if (ret != VMX_SUCCESS) {
         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
@@ -72,54 +77,10 @@ static int inline check_vmcs_read(vmcs_field_t field, void * val) {
     return ret;
 }
 
-#if 0
-// For the 32 bit reserved bit fields 
-// MB1s are in the low 32 bits, MBZs are in the high 32 bits of the MSR
-static uint32_t sanitize_bits1(uint32_t msr_num, uint32_t val) {
-    v3_msr_t mask_msr;
-
-    PrintDebug("sanitize_bits1 (MSR:%x)\n", msr_num);
-
-    v3_get_msr(msr_num, &mask_msr.hi, &mask_msr.lo);
 
-    PrintDebug("MSR %x = %x : %x \n", msr_num, mask_msr.hi, mask_msr.lo);
-
-    val |= mask_msr.lo;
-    val |= mask_msr.hi;
-  
-    return val;
-}
-
-
-
-static addr_t sanitize_bits2(uint32_t msr_num0, uint32_t msr_num1, addr_t val) {
-    v3_msr_t msr0, msr1;
-    addr_t msr0_val, msr1_val;
-
-    PrintDebug("sanitize_bits2 (MSR0=%x, MSR1=%x)\n", msr_num0, msr_num1);
-
-    v3_get_msr(msr_num0, &msr0.hi, &msr0.lo);
-    v3_get_msr(msr_num1, &msr1.hi, &msr1.lo);
-  
-    // This generates a mask that is the natural bit width of the CPU
-    msr0_val = msr0.value;
-    msr1_val = msr1.value;
-
-    PrintDebug("MSR %x = %p, %x = %p \n", msr_num0, (void*)msr0_val, msr_num1, (void*)msr1_val);
-
-    val |= msr0_val;
-    val |= msr1_val;
-
-    return val;
-}
-
-
-
-#endif
 
 
 static addr_t allocate_vmcs() {
-    reg_ex_t msr;
     struct vmcs_data * vmcs_page = NULL;
 
     PrintDebug("Allocating page\n");
@@ -127,10 +88,8 @@ static addr_t allocate_vmcs() {
     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
     memset(vmcs_page, 0, 4096);
 
-    v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
-    
-    vmcs_page->revision = ((struct vmx_basic_msr*)&msr)->revision;
-    PrintDebug("VMX Revision: 0x%x\n",vmcs_page->revision);
+    vmcs_page->revision = hw_info.basic_info.revision;
+    PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
 
     return (addr_t)V3_PAddr((void *)vmcs_page);
 }
@@ -140,9 +99,15 @@ static addr_t allocate_vmcs() {
 
 static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state) {
     int vmx_ret = 0;
+    struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
+
+    // disable global interrupts for vm state initialization
+    v3_disable_ints();
 
     PrintDebug("Loading VMCS\n");
     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
+    active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
+    vmx_state->state = VMX_UNLAUNCHED;
 
     if (vmx_ret != VMX_SUCCESS) {
         PrintError("VMPTRLD failed\n");
@@ -402,8 +367,12 @@ static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state)
 
     info->dbg_regs.dr7 = 0x400;
 
+#ifdef __V3_64BIT__
     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
-    
+#else
+    vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
+    vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
+#endif
 
     if (v3_update_vmcs_ctrl_fields(info)) {
         PrintError("Could not write control fields!\n");
@@ -416,7 +385,12 @@ static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state)
     }
 
 
-    vmx_state->state = VMXASSIST_DISABLED;
+    vmx_state->assist_state = VMXASSIST_DISABLED;
+
+    // reenable global interrupts for vm state initialization now
+    // that the vm state is initialized. If another VM kicks us off, 
+    // it'll update our vmx state so that we know to reload ourself
+    v3_enable_ints();
 
     return 0;
 }
@@ -435,6 +409,7 @@ int v3_init_vmx_vmcs(struct guest_info * info, v3_vm_class_t vm_class) {
     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
 
     info->vmm_data = vmx_state;
+    vmx_state->state = VMX_UNLAUNCHED;
 
     PrintDebug("Initializing VMCS (addr=%p)\n", info->vmm_data);
     
@@ -459,6 +434,18 @@ int v3_init_vmx_vmcs(struct guest_info * info, v3_vm_class_t vm_class) {
     return 0;
 }
 
+
+int v3_deinit_vmx_vmcs(struct guest_info * core) {
+    struct vmx_data * vmx_state = core->vmm_data;
+
+    V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
+
+    V3_Free(vmx_state);
+
+    return 0;
+}
+
+
 static int update_irq_exit_state(struct guest_info * info) {
     struct vmx_exit_idt_vec_info idt_vec_info;
 
@@ -642,15 +629,27 @@ int v3_vmx_enter(struct guest_info * info) {
     int ret = 0;
     uint32_t tsc_offset_low, tsc_offset_high;
     struct vmx_exit_info exit_info;
+    struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
 
     // Conditionally yield the CPU if the timeslice has expired
     v3_yield_cond(info);
 
-    // v3_print_guest_state(info);
+    // Perform any additional yielding needed for time adjustment
+    v3_adjust_time(info);
+
+    // Update timer devices prior to entering VM.
+    v3_update_timers(info);
 
     // disable global interrupts for vm state transition
     v3_disable_ints();
 
+
+    if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) {
+       vmcs_load(vmx_info->vmcs_ptr_phys);
+       active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
+    }
+
+
     v3_vmx_restore_vmcs(info);
 
 
@@ -668,26 +667,24 @@ int v3_vmx_enter(struct guest_info * info) {
        vmcs_write(VMCS_GUEST_CR3, guest_cr3);
     }
 
-    v3_update_timers(info);
-
-    /* If this guest is frequency-lagged behind host time, wait 
-     * for the appropriate host time before resuming the guest. */
-    v3_adjust_time(info);
+    // Perform last-minute time bookkeeping prior to entering the VM
+    v3_time_enter_vm(info);
 
     tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
     tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
     check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
     check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
 
-    PrintDebug("Stored 0x%x_%x into vmcs TSC offset.\n", 
-              tsc_offset_high, tsc_offset_low);
-    if (info->vm_info->run_state == VM_STOPPED) {
+
+    if (vmx_info->state == VMX_UNLAUNCHED) {
+       vmx_info->state = VMX_LAUNCHED;
        info->vm_info->run_state = VM_RUNNING;
        ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
     } else {
+       V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
        ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
     }
-
+    
     //  PrintDebug("VMX Exit: ret=%d\n", ret);
 
     if (ret != VMX_SUCCESS) {
@@ -699,6 +696,9 @@ int v3_vmx_enter(struct guest_info * info) {
        return -1;
     }
 
+    // Immediate exit from VM time bookkeeping
+    v3_time_exit_vm(info);
+
     info->num_exits++;
 
     /* Update guest state */
@@ -731,6 +731,17 @@ int v3_vmx_enter(struct guest_info * info) {
     update_irq_exit_state(info);
 #endif
 
+    if (exit_info.exit_reason == VMEXIT_INTR_WINDOW) {
+       // This is a special case whose only job is to inject an interrupt
+       vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
+        vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
+        vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
+
+#ifdef CONFIG_DEBUG_INTERRUPTS
+        PrintDebug("Interrupts available again! (RIP=%llx)\n", info->rip);
+#endif
+    }
+
     // reenable global interrupts after vm exit
     v3_enable_ints();
 
@@ -746,20 +757,54 @@ int v3_vmx_enter(struct guest_info * info) {
 }
 
 
-int v3_start_vmx_guest(struct guest_info* info) {
+int v3_start_vmx_guest(struct guest_info * info) {
+
+    PrintDebug("Starting VMX core %u\n", info->cpu_id);
 
+    if (info->cpu_id == 0) {
+       info->core_run_state = CORE_RUNNING;
+       info->vm_info->run_state = VM_RUNNING;
+    } else {
 
-    PrintDebug("Launching VMX guest\n");
+        PrintDebug("VMX core %u: Waiting for core initialization\n", info->cpu_id);
+
+        while (info->core_run_state == CORE_STOPPED) {
+            v3_yield(info);
+            //PrintDebug("VMX core %u: still waiting for INIT\n",info->cpu_id);
+        }
+       
+       PrintDebug("VMX core %u initialized\n", info->cpu_id);
+    }
+
+
+    PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
+               info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
+               info->segments.cs.limit, (void *)(info->rip));
+
+
+    PrintDebug("VMX core %u: Launching VMX VM\n", info->cpu_id);
 
     v3_start_time(info);
 
     while (1) {
+
+       if (info->vm_info->run_state == VM_STOPPED) {
+           info->core_run_state = CORE_STOPPED;
+           break;
+       }
+
        if (v3_vmx_enter(info) == -1) {
            v3_print_vmcs();
            print_exit_log(info);
            return -1;
        }
 
+
+
+       if (info->vm_info->run_state == VM_STOPPED) {
+           info->core_run_state = CORE_STOPPED;
+           break;
+       }
 /*
        if ((info->num_exits % 5000) == 0) {
            V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
@@ -772,6 +817,12 @@ int v3_start_vmx_guest(struct guest_info* info) {
 }
 
 
+
+
+#define VMX_FEATURE_CONTROL_MSR     0x0000003a
+#define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
+#define CPUID_1_ECX_VTXFLAG 0x00000020
+
 int v3_is_vmx_capable() {
     v3_msr_t feature_msr;
     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
@@ -785,7 +836,7 @@ int v3_is_vmx_capable() {
        
         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
 
-        if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
+        if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
             PrintDebug("VMX is locked -- enable in the BIOS\n");
             return 0;
         }
@@ -798,81 +849,23 @@ int v3_is_vmx_capable() {
     return 1;
 }
 
-static int has_vmx_nested_paging() {
-    return 0;
-}
+
+
 
 
 
 void v3_init_vmx_cpu(int cpu_id) {
     extern v3_cpu_arch_t v3_cpu_types[];
-    struct v3_msr tmp_msr;
-    uint64_t ret = 0;
 
-    v3_get_msr(VMX_CR4_FIXED0_MSR,&(tmp_msr.hi),&(tmp_msr.lo));
-#ifdef __V3_64BIT__
-    __asm__ __volatile__ (
-                         "movq %%cr4, %%rbx;"
-                         "orq  $0x00002000, %%rbx;"
-                         "movq %%rbx, %0;"
-                         : "=m"(ret) 
-                         :
-                         : "%rbx"
-                         );
-
-    if ((~ret & tmp_msr.value) == 0) {
-        __asm__ __volatile__ (
-                             "movq %0, %%cr4;"
-                             :
-                             : "q"(ret)
-                             );
-    } else {
-        PrintError("Invalid CR4 Settings!\n");
-        return;
-    }
-
-    __asm__ __volatile__ (
-                         "movq %%cr0, %%rbx; "
-                         "orq  $0x00000020,%%rbx; "
-                         "movq %%rbx, %%cr0;"
-                         :
-                         :
-                         : "%rbx"
-                         );
-#elif __V3_32BIT__
-    __asm__ __volatile__ (
-                         "movl %%cr4, %%ecx;"
-                         "orl  $0x00002000, %%ecx;"
-                         "movl %%ecx, %0;"
-                         : "=m"(ret) 
-                         :
-                         : "%ecx"
-                         );
-
-    if ((~ret & tmp_msr.value) == 0) {
-        __asm__ __volatile__ (
-                             "movl %0, %%cr4;"
-                             :
-                             : "q"(ret)
-                             );
-    } else {
-        PrintError("Invalid CR4 Settings!\n");
-        return;
+    if (cpu_id == 0) {
+       if (v3_init_vmx_hw(&hw_info) == -1) {
+           PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
+           return;
+       }
     }
 
-    __asm__ __volatile__ (
-                         "movl %%cr0, %%ecx; "
-                         "orl  $0x00000020,%%ecx; "
-                         "movl %%ecx, %%cr0;"
-                         :
-                         :
-                         : "%ecx"
-                         );
-
-#endif
 
-    //
-    // Should check and return Error here.... 
+    enable_vmx();
 
 
     // Setup VMXON Region
@@ -880,7 +873,7 @@ void v3_init_vmx_cpu(int cpu_id) {
 
     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);
 
-    if (v3_enable_vmx(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
+    if (vmx_on(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
         PrintDebug("VMX Enabled\n");
     } else {
         PrintError("VMX initialization failure\n");
@@ -888,11 +881,14 @@ void v3_init_vmx_cpu(int cpu_id) {
     }
     
 
-    if (has_vmx_nested_paging() == 1) {
-        v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
-    } else {
-        v3_cpu_types[cpu_id] = V3_VMX_CPU;
-    }
+    v3_cpu_types[cpu_id] = V3_VMX_CPU;
+
 
 }
 
+
+void v3_deinit_vmx_cpu(int cpu_id) {
+    extern v3_cpu_arch_t v3_cpu_types[];
+    v3_cpu_types[cpu_id] = V3_INVALID_CPU;
+    V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
+}