Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Lots of pedantic error checking in Palacios proper, especially for memory
[palacios.git] / palacios / src / palacios / svm.c
index bdb9862..facdb18 100644 (file)
@@ -1,4 +1,3 @@
-
 /* 
  * This file is part of the Palacios Virtual Machine Monitor developed
  * by the V3VEE Project with funding from the United States National 
 #include <palacios/svm_msr.h>
 
 #include <palacios/vmm_rbtree.h>
+#include <palacios/vmm_barrier.h>
+#include <palacios/vmm_debug.h>
+
+#ifdef V3_CONFIG_CHECKPOINT
+#include <palacios/vmm_checkpoint.h>
+#endif
 
 #include <palacios/vmm_direct_paging.h>
 
@@ -81,6 +86,25 @@ static vmcb_t * Allocate_VMCB() {
 }
 
 
+static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
+{
+    int status;
+
+    // Call arch-independent handler
+    if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
+       return status;
+    }
+
+    // SVM-specific code
+    {
+       // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
+       struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
+       hw_efer->svme = 1;
+    }
+
+    return 0;
+}
+
 
 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
@@ -106,10 +130,9 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
 
     ctrl_area->instrs.HLT = 1;
 
-#ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
-    ctrl_area->instrs.RDTSC = 1;
-    ctrl_area->svm_instrs.RDTSCP = 1;
-#endif
+    /* Set at VMM launch as needed */
+    ctrl_area->instrs.RDTSC = 0;
+    ctrl_area->svm_instrs.RDTSCP = 0;
 
     // guest_state->cr0 = 0x00000001;    // PE 
   
@@ -133,7 +156,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
     ctrl_area->instrs.NMI = 1;
     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
     ctrl_area->instrs.INIT = 1;
-    ctrl_area->instrs.PAUSE = 1;
+    //    ctrl_area->instrs.PAUSE = 1;
     ctrl_area->instrs.shutdown_evts = 1;
 
 
@@ -221,7 +244,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
 
     v3_hook_msr(core->vm_info, EFER_MSR, 
                &v3_handle_efer_read,
-               &v3_handle_efer_write, 
+               &v3_svm_handle_efer_write, 
                core);
 
     if (core->shdw_pg_mode == SHADOW_PAGING) {
@@ -288,6 +311,28 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
        &v3_handle_vm_cr_read,
        &v3_handle_vm_cr_write, 
        core);
+
+
+    {
+#define INT_PENDING_AMD_MSR            0xc0010055
+
+       v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
+
+       v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
+
+
+       v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
+
+       // Passthrough read operations are ok.
+       v3_hook_msr(core->vm_info, INT_PENDING_AMD_MSR, NULL, v3_msr_unhandled_write, NULL);
+    }
 }
 
 
@@ -309,6 +354,8 @@ int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
        return -1;
     }
 
+    core->core_run_state = CORE_STOPPED;
+
     return 0;
 }
 
@@ -319,6 +366,37 @@ int v3_deinit_svm_vmcb(struct guest_info * core) {
 }
 
 
+#ifdef V3_CONFIG_CHECKPOINT
+int v3_svm_save_core(struct guest_info * core, void * ctx){
+
+    if (v3_chkpt_save_8(ctx, "cpl", &(core->cpl)) == -1) { 
+       PrintError("Could not save SVM cpl\n");
+       return -1;
+    }
+
+    if (v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) { 
+       PrintError("Could not save SVM vmcb\n");
+       return -1;
+    }
+
+    return 0;
+}
+
+int v3_svm_load_core(struct guest_info * core, void * ctx){
+    
+    if (v3_chkpt_load_8(ctx, "cpl", &(core->cpl)) == -1) { 
+       PrintError("Could not load SVM cpl\n");
+       return -1;
+    }
+
+    if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
+       return -1;
+    }
+
+    return 0;
+}
+#endif
+
 static int update_irq_exit_state(struct guest_info * info) {
     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
 
@@ -452,6 +530,24 @@ static int update_irq_entry_state(struct guest_info * info) {
     return 0;
 }
 
+int 
+v3_svm_config_tsc_virtualization(struct guest_info * info) {
+    vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
+
+    if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
+       ctrl_area->instrs.RDTSC = 1;
+       ctrl_area->svm_instrs.RDTSCP = 1;
+    } else {
+       ctrl_area->instrs.RDTSC = 0;
+       ctrl_area->svm_instrs.RDTSCP = 0;
+       if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
+               ctrl_area->TSC_OFFSET = 0;
+       } else {
+               ctrl_area->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
+       }
+    }
+    return 0;
+}
 
 /* 
  * CAUTION and DANGER!!! 
@@ -465,21 +561,20 @@ int v3_svm_enter(struct guest_info * info) {
     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
+    uint64_t guest_cycles = 0;
 
     // Conditionally yield the CPU if the timeslice has expired
     v3_yield_cond(info);
 
-    // Perform any additional yielding needed for time adjustment
-    v3_adjust_time(info);
+    // Update timer devices after being in the VM before doing 
+    // IRQ updates, so that any interrupts they raise get seen 
+    // immediately.
+    v3_advance_time(info, NULL);
+    v3_update_timers(info);
 
     // disable global interrupts for vm state transition
     v3_clgi();
 
-    // Update timer devices after being in the VM, with interupts
-    // disabled, but before doing IRQ updates, so that any interrupts they 
-    //raise get seen immediately.
-    v3_update_timers(info);
-
     // Synchronize the guest state to the VMCB
     guest_state->cr0 = info->ctrl_regs.cr0;
     guest_state->cr2 = info->ctrl_regs.cr2;
@@ -491,6 +586,12 @@ int v3_svm_enter(struct guest_info * info) {
     guest_state->rflags = info->ctrl_regs.rflags;
     guest_state->efer = info->ctrl_regs.efer;
     
+    /* Synchronize MSRs */
+    guest_state->star = info->msrs.star;
+    guest_state->lstar = info->msrs.lstar;
+    guest_state->sfmask = info->msrs.sfmask;
+    guest_state->KernelGsBase = info->msrs.kern_gs_base;
+
     guest_state->cpl = info->cpl;
 
     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
@@ -524,19 +625,28 @@ int v3_svm_enter(struct guest_info * info) {
     }
 #endif
 
-    v3_time_enter_vm(info);
-    guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
+    v3_svm_config_tsc_virtualization(info);
 
     //V3_Print("Calling v3_svm_launch\n");
+    {  
+       uint64_t entry_tsc = 0;
+       uint64_t exit_tsc = 0;
+       
+       rdtscll(entry_tsc);
+
+       v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
+
+       rdtscll(exit_tsc);
+
+       guest_cycles = exit_tsc - entry_tsc;
+    }
 
-    v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
 
     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
 
     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
 
-    // Immediate exit from VM time bookkeeping
-    v3_time_exit_vm(info);
+    v3_advance_time(info, &guest_cycles);
 
     info->num_exits++;
 
@@ -557,6 +667,12 @@ int v3_svm_enter(struct guest_info * info) {
     info->ctrl_regs.rflags = guest_state->rflags;
     info->ctrl_regs.efer = guest_state->efer;
     
+    /* Synchronize MSRs */
+    info->msrs.star =  guest_state->star;
+    info->msrs.lstar = guest_state->lstar;
+    info->msrs.sfmask = guest_state->sfmask;
+    info->msrs.kern_gs_base = guest_state->KernelGsBase;
+
     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
     info->cpu_mode = v3_get_vm_cpu_mode(info);
     info->mem_mode = v3_get_vm_mem_mode(info);
@@ -581,6 +697,11 @@ int v3_svm_enter(struct guest_info * info) {
     // Conditionally yield the CPU if the timeslice has expired
     v3_yield_cond(info);
 
+    // This update timers is for time-dependent handlers
+    // if we're slaved to host time
+    v3_advance_time(info, NULL);
+    v3_update_timers(info);
+
     {
        int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
        
@@ -591,6 +712,11 @@ int v3_svm_enter(struct guest_info * info) {
        }
     }
 
+    if (info->timeouts.timeout_active) {
+       /* Check to see if any timeouts have expired */
+       v3_handle_timeouts(info, guest_cycles);
+    }
+
 
     return 0;
 }
@@ -604,16 +730,24 @@ int v3_start_svm_guest(struct guest_info * info) {
 
     if (info->vcpu_id == 0) {
        info->core_run_state = CORE_RUNNING;
-       info->vm_info->run_state = VM_RUNNING;
     } else  { 
        PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
 
        while (info->core_run_state == CORE_STOPPED) {
+           
+           if (info->vm_info->run_state == VM_STOPPED) {
+               // The VM was stopped before this core was initialized. 
+               return 0;
+           }
+
            v3_yield(info);
            //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
        }
 
        PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
+
+       // We'll be paranoid about race conditions here
+       v3_wait_at_barrier(info);
     } 
 
     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
@@ -673,6 +807,8 @@ int v3_start_svm_guest(struct guest_info * info) {
            break;
        }
 
+       v3_wait_at_barrier(info);
+
 
        if (info->vm_info->run_state == VM_STOPPED) {
            info->core_run_state = CORE_STOPPED;
@@ -802,6 +938,11 @@ void v3_init_svm_cpu(int cpu_id) {
     // Setup the host state save area
     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
 
+    if (!host_vmcbs[cpu_id]) {
+       PrintError("Failed to allocate VMCB\n");
+       return;
+    }
+
     /* 64-BIT-ISSUE */
     //  msr.e_reg.high = 0;
     //msr.e_reg.low = (uint_t)host_vmcb;
@@ -1041,6 +1182,11 @@ void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
 
   ctrl_area->instrs.IOIO_PROT = 1;
   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
+
+  if (!ctrl_area->IOPM_BASE_PA) { 
+      PrintError("Cannot allocate IO bitmap\n");
+      return;
+  }
   
   {
     reg_ex_t tmp_reg;