Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Explictly tags in saves for PIT, APIC, IOAPIC, SVM, and CORE
[palacios.git] / palacios / src / palacios / svm.c
index 95d9e6c..99d12d3 100644 (file)
@@ -18,6 +18,7 @@
  */
 
 
+
 #include <palacios/svm.h>
 #include <palacios/vmm.h>
 
@@ -36,6 +37,9 @@
 
 #include <palacios/vmm_rbtree.h>
 #include <palacios/vmm_barrier.h>
+#include <palacios/vmm_debug.h>
+
+
 
 #ifdef V3_CONFIG_CHECKPOINT
 #include <palacios/vmm_checkpoint.h>
@@ -129,10 +133,9 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
 
     ctrl_area->instrs.HLT = 1;
 
-#ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
-    ctrl_area->instrs.RDTSC = 1;
-    ctrl_area->svm_instrs.RDTSCP = 1;
-#endif
+    /* Set at VMM launch as needed */
+    ctrl_area->instrs.RDTSC = 0;
+    ctrl_area->svm_instrs.RDTSCP = 0;
 
     // guest_state->cr0 = 0x00000001;    // PE 
   
@@ -156,7 +159,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
     ctrl_area->instrs.NMI = 1;
     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
     ctrl_area->instrs.INIT = 1;
-    ctrl_area->instrs.PAUSE = 1;
+    //    ctrl_area->instrs.PAUSE = 1;
     ctrl_area->instrs.shutdown_evts = 1;
 
 
@@ -240,7 +243,19 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
     PrintDebug("Exiting on interrupts\n");
     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
     ctrl_area->instrs.INTR = 1;
+    // The above also assures the TPR changes (CR8) are only virtual
+
 
+    // However, we need to see TPR writes since they will
+    // affect the virtual apic
+    // we reflect out cr8 to ctrl_regs->apic_tpr
+    ctrl_area->cr_reads.cr8 = 1;
+    ctrl_area->cr_writes.cr8 = 1;
+    // We will do all TPR comparisons in the virtual apic
+    // We also do not want the V_TPR to be able to mask the PIC
+    ctrl_area->guest_ctrl.V_IGN_TPR = 1;
+
+    
 
     v3_hook_msr(core->vm_info, EFER_MSR, 
                &v3_handle_efer_read,
@@ -276,7 +291,6 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
        ctrl_area->cr_writes.cr3 = 1;
 
 
-
        ctrl_area->instrs.INVLPG = 1;
 
        ctrl_area->exceptions.pf = 1;
@@ -284,7 +298,6 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
        guest_state->g_pat = 0x7040600070406ULL;
 
 
-
     } else if (core->shdw_pg_mode == NESTED_PAGING) {
        // Flush the TLB on entries/exits
        ctrl_area->TLB_CONTROL = 1;
@@ -354,6 +367,8 @@ int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
        return -1;
     }
 
+    core->core_run_state = CORE_STOPPED;
+
     return 0;
 }
 
@@ -367,21 +382,62 @@ int v3_deinit_svm_vmcb(struct guest_info * core) {
 #ifdef V3_CONFIG_CHECKPOINT
 int v3_svm_save_core(struct guest_info * core, void * ctx){
 
-    v3_chkpt_save_8(ctx, "cpl", &(core->cpl));
-    v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data);
+  vmcb_saved_state_t * guest_area = GET_VMCB_SAVE_STATE_AREA(core->vmm_data); 
+
+  // Special case saves of data we need immediate access to
+  // in some cases
+  V3_CHKPT_SAVE(ctx, "CPL", core->cpl, failout);
+  V3_CHKPT_SAVE(ctx,"STAR", guest_area->star, failout); 
+  V3_CHKPT_SAVE(ctx,"CSTAR", guest_area->cstar, failout); 
+  V3_CHKPT_SAVE(ctx,"LSTAR", guest_area->lstar, failout); 
+  V3_CHKPT_SAVE(ctx,"SFMASK", guest_area->sfmask, failout); 
+  V3_CHKPT_SAVE(ctx,"KERNELGSBASE", guest_area->KernelGsBase, failout); 
+  V3_CHKPT_SAVE(ctx,"SYSENTER_CS", guest_area->sysenter_cs, failout); 
+  V3_CHKPT_SAVE(ctx,"SYSENTER_ESP", guest_area->sysenter_esp, failout); 
+  V3_CHKPT_SAVE(ctx,"SYSENTER_EIP", guest_area->sysenter_eip, failout); 
+  
+// and then we save the whole enchilada
+  if (v3_chkpt_save(ctx, "VMCB_DATA", PAGE_SIZE, core->vmm_data)) { 
+    PrintError("Could not save SVM vmcb\n");
+    goto failout;
+  }
+  
+  return 0;
+
+ failout:
+  PrintError("Failed to save SVM state for core\n");
+  return -1;
 
-    return 0;
 }
 
 int v3_svm_load_core(struct guest_info * core, void * ctx){
     
-    v3_chkpt_load_8(ctx, "cpl", &(core->cpl));
 
-    if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
-       return -1;
-    }
+  vmcb_saved_state_t * guest_area = GET_VMCB_SAVE_STATE_AREA(core->vmm_data); 
+
+  // Reload what we special cased, which we will overwrite in a minute
+  V3_CHKPT_LOAD(ctx, "CPL", core->cpl, failout);
+  V3_CHKPT_LOAD(ctx,"STAR", guest_area->star, failout); 
+  V3_CHKPT_LOAD(ctx,"CSTAR", guest_area->cstar, failout); 
+  V3_CHKPT_LOAD(ctx,"LSTAR", guest_area->lstar, failout); 
+  V3_CHKPT_LOAD(ctx,"SFMASK", guest_area->sfmask, failout); 
+  V3_CHKPT_LOAD(ctx,"KERNELGSBASE", guest_area->KernelGsBase, failout); 
+  V3_CHKPT_LOAD(ctx,"SYSENTER_CS", guest_area->sysenter_cs, failout); 
+  V3_CHKPT_LOAD(ctx,"SYSENTER_ESP", guest_area->sysenter_esp, failout); 
+  V3_CHKPT_LOAD(ctx,"SYSENTER_EIP", guest_area->sysenter_eip, failout); 
+  
+  // and then we load the whole enchilada
+  if (v3_chkpt_load(ctx, "VMCB_DATA", PAGE_SIZE, core->vmm_data)) { 
+    PrintError("Could not load SVM vmcb\n");
+    goto failout;
+  }
+  
+  return 0;
+
+ failout:
+  PrintError("Failed to save SVM state for core\n");
+  return -1;
 
-    return 0;
 }
 #endif
 
@@ -462,8 +518,11 @@ static int update_irq_entry_state(struct guest_info * info) {
 #endif
        guest_ctrl->guest_ctrl.V_IRQ = 1;
        guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
+
+       // We ignore the virtual TPR on this injection
+       // TPR/PPR tests have already been done in the APIC.
        guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
-       guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
+       guest_ctrl->guest_ctrl.V_INTR_PRIO = info->intr_core_state.irq_vector >> 4 ;  // 0xf;
 
     } else {
        switch (v3_intr_pending(info)) {
@@ -472,8 +531,11 @@ static int update_irq_entry_state(struct guest_info * info) {
 
                guest_ctrl->guest_ctrl.V_IRQ = 1;
                guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
+
+               // We ignore the virtual TPR on this injection
+               // TPR/PPR tests have already been done in the APIC.
                guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
-               guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
+               guest_ctrl->guest_ctrl.V_INTR_PRIO = info->intr_core_state.irq_vector >> 4 ;  // 0xf;
 
 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
                PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
@@ -518,6 +580,26 @@ static int update_irq_entry_state(struct guest_info * info) {
     return 0;
 }
 
+int 
+v3_svm_config_tsc_virtualization(struct guest_info * info) {
+    vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
+
+
+    if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
+       ctrl_area->instrs.RDTSC = 1;
+       ctrl_area->svm_instrs.RDTSCP = 1;
+    } else {
+       ctrl_area->instrs.RDTSC = 0;
+       ctrl_area->svm_instrs.RDTSCP = 0;
+
+       if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
+               ctrl_area->TSC_OFFSET = 0;
+       } else {
+               ctrl_area->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
+       }
+    }
+    return 0;
+}
 
 /* 
  * CAUTION and DANGER!!! 
@@ -531,22 +613,20 @@ int v3_svm_enter(struct guest_info * info) {
     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
-    sint64_t tsc_offset;
+    uint64_t guest_cycles = 0;
 
     // Conditionally yield the CPU if the timeslice has expired
-    v3_yield_cond(info);
+    v3_yield_cond(info,-1);
 
-    // Perform any additional yielding needed for time adjustment
-    v3_adjust_time(info);
+    // Update timer devices after being in the VM before doing 
+    // IRQ updates, so that any interrupts they raise get seen 
+    // immediately.
+    v3_advance_time(info, NULL);
+    v3_update_timers(info);
 
     // disable global interrupts for vm state transition
     v3_clgi();
 
-    // Update timer devices after being in the VM, with interupts
-    // disabled, but before doing IRQ updates, so that any interrupts they 
-    //raise get seen immediately.
-    v3_update_timers(info);
-
     // Synchronize the guest state to the VMCB
     guest_state->cr0 = info->ctrl_regs.cr0;
     guest_state->cr2 = info->ctrl_regs.cr2;
@@ -554,10 +634,26 @@ int v3_svm_enter(struct guest_info * info) {
     guest_state->cr4 = info->ctrl_regs.cr4;
     guest_state->dr6 = info->dbg_regs.dr6;
     guest_state->dr7 = info->dbg_regs.dr7;
-    guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
+
+    // CR8 is now updated by read/writes and it contains the APIC TPR
+    // the V_TPR should be just the class part of that.
+    // This update is here just for completeness.  We currently
+    // are ignoring V_TPR on all injections and doing the priority logivc
+    // in the APIC.
+    // guest_ctrl->guest_ctrl.V_TPR = ((info->ctrl_regs.apic_tpr) >> 4) & 0xf;
+
+    //guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
+    // 
+    
     guest_state->rflags = info->ctrl_regs.rflags;
     guest_state->efer = info->ctrl_regs.efer;
     
+    /* Synchronize MSRs */
+    guest_state->star = info->msrs.star;
+    guest_state->lstar = info->msrs.lstar;
+    guest_state->sfmask = info->msrs.sfmask;
+    guest_state->KernelGsBase = info->msrs.kern_gs_base;
+
     guest_state->cpl = info->cpl;
 
     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
@@ -591,21 +687,28 @@ int v3_svm_enter(struct guest_info * info) {
     }
 #endif
 
-    v3_time_enter_vm(info);
-    tsc_offset = v3_tsc_host_offset(&info->time_state);
-    guest_ctrl->TSC_OFFSET = tsc_offset;
-
+    v3_svm_config_tsc_virtualization(info);
 
     //V3_Print("Calling v3_svm_launch\n");
+    {  
+       uint64_t entry_tsc = 0;
+       uint64_t exit_tsc = 0;
+       
+       rdtscll(entry_tsc);
+
+       v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
+
+       rdtscll(exit_tsc);
+
+       guest_cycles = exit_tsc - entry_tsc;
+    }
 
-    v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
 
     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
 
     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
 
-    // Immediate exit from VM time bookkeeping
-    v3_time_exit_vm(info);
+    v3_advance_time(info, &guest_cycles);
 
     info->num_exits++;
 
@@ -622,10 +725,20 @@ int v3_svm_enter(struct guest_info * info) {
     info->ctrl_regs.cr4 = guest_state->cr4;
     info->dbg_regs.dr6 = guest_state->dr6;
     info->dbg_regs.dr7 = guest_state->dr7;
-    info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
+    //
+    // We do not track this anymore
+    // V_TPR is ignored and we do the logic in the APIC
+    //info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
+    //
     info->ctrl_regs.rflags = guest_state->rflags;
     info->ctrl_regs.efer = guest_state->efer;
     
+    /* Synchronize MSRs */
+    info->msrs.star =  guest_state->star;
+    info->msrs.lstar = guest_state->lstar;
+    info->msrs.sfmask = guest_state->sfmask;
+    info->msrs.kern_gs_base = guest_state->KernelGsBase;
+
     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
     info->cpu_mode = v3_get_vm_cpu_mode(info);
     info->mem_mode = v3_get_vm_mem_mode(info);
@@ -648,7 +761,12 @@ int v3_svm_enter(struct guest_info * info) {
     v3_stgi();
  
     // Conditionally yield the CPU if the timeslice has expired
-    v3_yield_cond(info);
+    v3_yield_cond(info,-1);
+
+    // This update timers is for time-dependent handlers
+    // if we're slaved to host time
+    v3_advance_time(info, NULL);
+    v3_update_timers(info);
 
     {
        int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
@@ -660,6 +778,11 @@ int v3_svm_enter(struct guest_info * info) {
        }
     }
 
+    if (info->timeouts.timeout_active) {
+       /* Check to see if any timeouts have expired */
+       v3_handle_timeouts(info, guest_cycles);
+    }
+
 
     return 0;
 }
@@ -673,7 +796,6 @@ int v3_start_svm_guest(struct guest_info * info) {
 
     if (info->vcpu_id == 0) {
        info->core_run_state = CORE_RUNNING;
-       info->vm_info->run_state = VM_RUNNING;
     } else  { 
        PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
 
@@ -684,7 +806,7 @@ int v3_start_svm_guest(struct guest_info * info) {
                return 0;
            }
 
-           v3_yield(info);
+           v3_yield(info,-1);
            //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
        }
 
@@ -882,6 +1004,11 @@ void v3_init_svm_cpu(int cpu_id) {
     // Setup the host state save area
     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
 
+    if (!host_vmcbs[cpu_id]) {
+       PrintError("Failed to allocate VMCB\n");
+       return;
+    }
+
     /* 64-BIT-ISSUE */
     //  msr.e_reg.high = 0;
     //msr.e_reg.low = (uint_t)host_vmcb;
@@ -1121,6 +1248,11 @@ void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
 
   ctrl_area->instrs.IOIO_PROT = 1;
   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
+
+  if (!ctrl_area->IOPM_BASE_PA) { 
+      PrintError("Cannot allocate IO bitmap\n");
+      return;
+  }
   
   {
     reg_ex_t tmp_reg;