Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


More changed files
[palacios.git] / palacios / src / palacios / vmx.c
index 90f8e52..a30cb4e 100644 (file)
@@ -33,6 +33,7 @@
 #include <palacios/vmx_msr.h>
 #include <palacios/vmm_decoder.h>
 #include <palacios/vmm_barrier.h>
+#include <palacios/vmm_timeout.h>
 
 #ifdef V3_CONFIG_CHECKPOINT
 #include <palacios/vmm_checkpoint.h>
@@ -68,6 +69,9 @@ static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
         return 1;
     }
 
+
+    
+
     return 0;
 }
 
@@ -100,12 +104,50 @@ static addr_t allocate_vmcs() {
     return (addr_t)V3_PAddr((void *)vmcs_page);
 }
 
+/*
+
+static int debug_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * src, void * priv_data) {
+    struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
+    V3_Print("\n\nEFER READ\n");
+    
+    v3_print_guest_state(core);
+
+    src->value = efer->value;
+    return 0;
+}
+
+static int debug_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
+    struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
+    V3_Print("\n\nEFER WRITE\n");
+    
+    v3_print_guest_state(core);
 
+    efer->value = src.value;
+
+    {
+       struct vmx_data * vmx_state = core->vmm_data;
+
+       V3_Print("Trapping page faults and GPFs\n");
+       vmx_state->excp_bmap.pf = 1;
+       vmx_state->excp_bmap.gp = 1;
+       
+        check_vmcs_write(VMCS_EXCP_BITMAP, vmx_state->excp_bmap.value);
+    }
+
+    return 0;
+}
+*/
 
 
 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
     int vmx_ret = 0;
 
+    /* Get Available features */
+    struct vmx_pin_ctrls avail_pin_ctrls;
+    avail_pin_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.pin_ctrls));
+    /* ** */
+
+
     // disable global interrupts for vm state initialization
     v3_disable_ints();
 
@@ -145,6 +187,13 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
     vmx_state->pin_ctrls.ext_int_exit = 1;
 
 
+    /* We enable the preemption timer by default to measure accurate guest time */
+    if (avail_pin_ctrls.active_preempt_timer) {
+       V3_Print("VMX Preemption Timer is available\n");
+       vmx_state->pin_ctrls.active_preempt_timer = 1;
+       vmx_state->exit_ctrls.save_preempt_timer = 1;
+    }
+
     vmx_state->pri_proc_ctrls.hlt_exit = 1;
 
 
@@ -171,11 +220,7 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
     vmx_state->exit_ctrls.host_64_on = 1;
 #endif
 
-    // Hook all accesses to EFER register
-    v3_hook_msr(core->vm_info, EFER_MSR, 
-               &v3_handle_efer_read,
-               &v3_handle_efer_write, 
-               core);
+
 
     // Restore host's EFER register on each VM EXIT
     vmx_state->exit_ctrls.ld_efer = 1;
@@ -184,9 +229,15 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
     vmx_state->exit_ctrls.save_efer = 1;
     vmx_state->entry_ctrls.ld_efer  = 1;
 
-    // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
-    vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
+    vmx_state->exit_ctrls.save_pat = 1;
+    vmx_state->exit_ctrls.ld_pat = 1;
+    vmx_state->entry_ctrls.ld_pat = 1;
+
+    /* Temporary GPF trap */
+    //    vmx_state->excp_bmap.gp = 1;
 
+    // Setup Guests initial PAT field
+    vmx_ret |= check_vmcs_write(VMCS_GUEST_PAT, 0x0007040600070406LL);
 
     /* Setup paging */
     if (core->shdw_pg_mode == SHADOW_PAGING) {
@@ -202,6 +253,10 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
 #define CR0_WP 0x00010000 // To ensure mem hooks work
         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
 
+
+       // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
+       vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
+
         core->ctrl_regs.cr3 = core->direct_map_pt;
 
         // vmx_state->pinbased_ctrls |= NMI_EXIT;
@@ -218,6 +273,12 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
        // Setup VMX Assist
        v3_vmxassist_init(core, vmx_state);
 
+       // Hook all accesses to EFER register
+       v3_hook_msr(core->vm_info, EFER_MSR, 
+                   &v3_handle_efer_read,
+                   &v3_handle_efer_write, 
+                   core);
+
     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
               (v3_cpu_types[core->pcpu_id] == V3_VMX_EPT_CPU)) {
 
@@ -228,6 +289,9 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
 
         // vmx_state->pinbased_ctrls |= NMI_EXIT;
 
+       // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
+       vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
+       
         /* Disable CR exits */
        vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
        vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
@@ -251,6 +315,9 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
            return -1;
        }
 
+       // Hook all accesses to EFER register
+       v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
+
     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
               (v3_cpu_types[core->pcpu_id] == V3_VMX_EPT_UG_CPU)) {
        int i = 0;
@@ -261,8 +328,9 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
        core->rip = 0xfff0;
        core->vm_regs.rdx = 0x00000f00;
        core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
-       core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
-
+       core->ctrl_regs.cr0 = 0x00000030; 
+       core->ctrl_regs.cr4 = 0x00002010; // Enable VMX and PSE flag
+       
 
        core->segments.cs.selector = 0xf000;
        core->segments.cs.limit = 0xffff;
@@ -307,7 +375,7 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
        core->segments.ldtr.selector = 0x0000;
        core->segments.ldtr.limit = 0x0000ffff;
        core->segments.ldtr.base = 0x0000000000000000LL;
-       core->segments.ldtr.type = 2;
+       core->segments.ldtr.type = 0x2;
        core->segments.ldtr.present = 1;
 
        core->segments.tr.selector = 0x0000;
@@ -332,11 +400,18 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
        vmx_state->pri_proc_ctrls.invlpg_exit = 0;
 
 
+       // Cause VM_EXIT whenever the CR4.VMXE bit is set
+       vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
+
+
        if (v3_init_ept(core, &hw_info) == -1) {
            PrintError("Error initializing EPT\n");
            return -1;
        }
 
+       // Hook all accesses to EFER register
+       //v3_hook_msr(core->vm_info, EFER_MSR, &debug_efer_read, &debug_efer_write, core);
+       v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
     } else {
        PrintError("Invalid Virtual paging mode\n");
        return -1;
@@ -406,6 +481,7 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
        msr_ret |= v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
        msr_ret |= v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
 
+       msr_ret |= v3_hook_msr(core->vm_info, IA32_PAT_MSR, NULL, NULL, NULL);
 
        // Not sure what to do about this... Does not appear to be an explicit hardware cache version...
        msr_ret |= v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
@@ -726,6 +802,9 @@ static int update_irq_entry_state(struct guest_info * info) {
 
 
 static struct vmx_exit_info exit_log[10];
+static uint64_t rip_log[10];
+
+
 
 static void print_exit_log(struct guest_info * info) {
     int cnt = info->num_exits % 10;
@@ -742,6 +821,9 @@ static void print_exit_log(struct guest_info * info) {
        V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
        V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
        V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
+       V3_Print("\tguest_linear_addr= %p\n", (void *)(addr_t)tmp->guest_linear_addr);
+       V3_Print("\tRIP = %p\n", (void *)rip_log[cnt]);
+
 
        cnt--;
 
@@ -753,6 +835,8 @@ static void print_exit_log(struct guest_info * info) {
 
 }
 
+
+
 /* 
  * CAUTION and DANGER!!! 
  * 
@@ -763,23 +847,23 @@ static void print_exit_log(struct guest_info * info) {
  */
 int v3_vmx_enter(struct guest_info * info) {
     int ret = 0;
+    sint64_t tsc_offset;
     uint32_t tsc_offset_low, tsc_offset_high;
     struct vmx_exit_info exit_info;
     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
+    uint64_t guest_cycles = 0;
 
     // Conditionally yield the CPU if the timeslice has expired
     v3_yield_cond(info);
 
-    // Perform any additional yielding needed for time adjustment
-    v3_adjust_time(info);
-
     // disable global interrupts for vm state transition
     v3_disable_ints();
 
     // Update timer devices late after being in the VM so that as much 
-    // of hte time in the VM is accounted for as possible. Also do it before
+    // of the time in the VM is accounted for as possible. Also do it before
     // updating IRQ entry state so that any interrupts the timers raise get 
     // handled on the next VM entry. Must be done with interrupts disabled.
+    v3_advance_time(info);
     v3_update_timers(info);
 
     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
@@ -805,30 +889,56 @@ int v3_vmx_enter(struct guest_info * info) {
        vmcs_write(VMCS_GUEST_CR3, guest_cr3);
     }
 
+
     // Perform last-minute time bookkeeping prior to entering the VM
     v3_time_enter_vm(info);
+    
+    tsc_offset = v3_tsc_host_offset(&info->time_state);
+    tsc_offset_high = (uint32_t)(( tsc_offset >> 32) & 0xffffffff);
+    tsc_offset_low = (uint32_t)(tsc_offset & 0xffffffff);
 
-    tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
-    tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
     check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
     check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
 
+    
+
     if (v3_update_vmcs_host_state(info)) {
        v3_enable_ints();
         PrintError("Could not write host state\n");
         return -1;
     }
+    
+    if (vmx_info->pin_ctrls.active_preempt_timer) {
+       /* Preemption timer is active */
+       uint32_t preempt_window = 0xffffffff;
 
-
-    if (vmx_info->state == VMX_UNLAUNCHED) {
-       vmx_info->state = VMX_LAUNCHED;
-       ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
-    } else {
-       V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
-       ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
+       if (info->timeouts.timeout_active) {
+           preempt_window = info->timeouts.next_timeout;
+       }
+       
+       check_vmcs_write(VMCS_PREEMPT_TIMER, preempt_window);
     }
-    
+   
 
+    {  
+       uint64_t entry_tsc = 0;
+       uint64_t exit_tsc = 0;
+
+       if (vmx_info->state == VMX_UNLAUNCHED) {
+           vmx_info->state = VMX_LAUNCHED;
+           rdtscll(entry_tsc);
+           ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
+           rdtscll(exit_tsc);
+
+       } else {
+           V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
+           rdtscll(entry_tsc);
+           ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
+           rdtscll(exit_tsc);
+       }
+
+       guest_cycles = exit_tsc - entry_tsc;    
+    }
 
     //  PrintDebug("VMX Exit: ret=%d\n", ret);
 
@@ -843,11 +953,23 @@ int v3_vmx_enter(struct guest_info * info) {
     }
 
 
+    info->num_exits++;
+
+    /* If we have the preemption time, then use it to get more accurate guest time */
+    if (vmx_info->pin_ctrls.active_preempt_timer) {
+       uint32_t cycles_left = 0;
+       check_vmcs_read(VMCS_PREEMPT_TIMER, &(cycles_left));
+
+       if (info->timeouts.timeout_active) {
+           guest_cycles = info->timeouts.next_timeout - cycles_left;
+       } else {
+           guest_cycles = 0xffffffff - cycles_left;
+       }
+    }
 
     // Immediate exit from VM time bookkeeping
-    v3_time_exit_vm(info);
+    v3_time_exit_vm(info, &guest_cycles);
 
-    info->num_exits++;
 
     /* Update guest state */
     v3_vmx_save_vmcs(info);
@@ -858,6 +980,7 @@ int v3_vmx_enter(struct guest_info * info) {
     info->cpu_mode = v3_get_vm_cpu_mode(info);
 
 
+
     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
@@ -873,6 +996,7 @@ int v3_vmx_enter(struct guest_info * info) {
     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
 
     exit_log[info->num_exits % 10] = exit_info;
+    rip_log[info->num_exits % 10] = get_addr_linear(info, info->rip, &(info->segments.cs));
 
 #ifdef V3_CONFIG_SYMCALL
     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
@@ -904,6 +1028,11 @@ int v3_vmx_enter(struct guest_info * info) {
        return -1;
     }
 
+    if (info->timeouts.timeout_active) {
+       /* Check to see if any timeouts have expired */
+       v3_handle_timeouts(info, guest_cycles);
+    }
+
     return 0;
 }
 
@@ -1060,8 +1189,9 @@ int v3_reset_vmx_vm_core(struct guest_info * core, addr_t rip) {
 
 void v3_init_vmx_cpu(int cpu_id) {
     addr_t vmx_on_region = 0;
+    extern v3_cpu_arch_t v3_mach_type;
 
-    if (cpu_id == 0) {
+    if (v3_mach_type == V3_INVALID_CPU) {
        if (v3_init_vmx_hw(&hw_info) == -1) {
            PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
            return;
@@ -1100,6 +1230,7 @@ void v3_init_vmx_cpu(int cpu_id) {
            v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
        }
     }
+    
 }