Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


enabled stopping a VM before the secondary cores have been initialized
[palacios.git] / palacios / src / palacios / svm.c
index a2cbbc1..f61fadf 100644 (file)
@@ -1,4 +1,3 @@
-
 /* 
  * This file is part of the Palacios Virtual Machine Monitor developed
  * by the V3VEE Project with funding from the United States National 
 #include <palacios/svm_msr.h>
 
 #include <palacios/vmm_rbtree.h>
+#include <palacios/vmm_barrier.h>
+
+#ifdef V3_CONFIG_CHECKPOINT
+#include <palacios/vmm_checkpoint.h>
+#endif
 
 #include <palacios/vmm_direct_paging.h>
 
@@ -81,6 +85,25 @@ static vmcb_t * Allocate_VMCB() {
 }
 
 
+static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
+{
+    int status;
+
+    // Call arch-independent handler
+    if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
+       return status;
+    }
+
+    // SVM-specific code
+    {
+       // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
+       struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
+       hw_efer->svme = 1;
+    }
+
+    return 0;
+}
+
 
 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
@@ -136,11 +159,6 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
     ctrl_area->instrs.PAUSE = 1;
     ctrl_area->instrs.shutdown_evts = 1;
 
-    /* KCH: intercept SW Interrupts (INT instr) */
-#ifdef V3_CONFIG_SW_INTERRUPTS
-    ctrl_area->instrs.INTn = 1;
-#endif
-
 
     /* DEBUG FOR RETURN CODE */
     ctrl_area->exit_code = 1;
@@ -226,40 +244,9 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
 
     v3_hook_msr(core->vm_info, EFER_MSR, 
                &v3_handle_efer_read,
-               &v3_handle_efer_write, 
+               &v3_svm_handle_efer_write, 
                core);
 
-#ifdef V3_CONFIG_HIJACK_SYSCALL_MSR
-    /* KCH: we're not hooking these to TRAP them,
-            instead, we're going to catch the target EIP.
-            Hopefully this EIP is the entry point in the ELF located in the 
-            vsyscall page. We can inject checks into the code segment such that
-            we don't have to exit on uninteresting system calls. This should
-            give us much better performance than INT 80, and should even obviate
-            the need to deal with software interrupts at all */
-    v3_hook_msr(core->vm_info, STAR_MSR,
-        &v3_handle_star_read,
-        &v3_handle_star_write,
-        core);
-    v3_hook_msr(core->vm_info, LSTAR_MSR,
-        &v3_handle_lstar_read,
-        &v3_handle_lstar_write,
-        core);
-    v3_hook_msr(core->vm_info, CSTAR_MSR,
-        &v3_handle_cstar_read,
-        &v3_handle_cstar_write,
-        core);
-    
-    /* KCH: this probably isn't necessary, as
-        SYSENTER is only used in legacy mode. In fact,
-        in long mode it results in an illegal instruction
-        exception */
-    v3_hook_msr(core->vm_info, IA32_SYSENTER_EIP_MSR,
-        &v3_handle_seeip_read,
-        &v3_handle_seeip_write,
-        core);
-#endif
-
     if (core->shdw_pg_mode == SHADOW_PAGING) {
        PrintDebug("Creating initial shadow page table\n");
        
@@ -324,6 +311,19 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
        &v3_handle_vm_cr_read,
        &v3_handle_vm_cr_write, 
        core);
+
+
+    {
+       v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
+
+       v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
+    }
 }
 
 
@@ -355,6 +355,27 @@ int v3_deinit_svm_vmcb(struct guest_info * core) {
 }
 
 
+#ifdef V3_CONFIG_CHECKPOINT
+int v3_svm_save_core(struct guest_info * core, void * ctx){
+
+    v3_chkpt_save_8(ctx, "cpl", &(core->cpl));
+    v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data);
+
+    return 0;
+}
+
+int v3_svm_load_core(struct guest_info * core, void * ctx){
+    
+    v3_chkpt_load_8(ctx, "cpl", &(core->cpl));
+
+    if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
+       return -1;
+    }
+
+    return 0;
+}
+#endif
+
 static int update_irq_exit_state(struct guest_info * info) {
     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
 
@@ -459,23 +480,21 @@ static int update_irq_entry_state(struct guest_info * info) {
            case V3_NMI:
                guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
                break;
-           case V3_SOFTWARE_INTR: {
-#ifdef CONFIG_DEBUG_INTERRUPTS
-            PrintDebug("Caught an injected software interrupt\n");
-            PrintDebug("\ttype: %d, vector: %d\n", SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
+           case V3_SOFTWARE_INTR:
+               guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
+
+#ifdef V3_CONFIG_DEBUG_INTERRUPTS
+               PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
+                          SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
 #endif
-            guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
-            guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
-            guest_ctrl->EVENTINJ.valid = 1;
+               guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
+               guest_ctrl->EVENTINJ.valid = 1;
             
-            /* reset the software interrupt state. 
-                we can do this because we know only one
-                sw int can be posted at a time on a given 
-                core, unlike irqs */
-            info->intr_core_state.swintr_posted = 0;
-            info->intr_core_state.swintr_vector = 0;
-            break;
-        }
+               /* reset swintr state */
+               info->intr_core_state.swintr_posted = 0;
+               info->intr_core_state.swintr_vector = 0;
+               
+               break;
            case V3_VIRTUAL_IRQ:
                guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
                break;
@@ -503,6 +522,7 @@ int v3_svm_enter(struct guest_info * info) {
     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
+    sint64_t tsc_offset;
 
     // Conditionally yield the CPU if the timeslice has expired
     v3_yield_cond(info);
@@ -513,7 +533,9 @@ int v3_svm_enter(struct guest_info * info) {
     // disable global interrupts for vm state transition
     v3_clgi();
 
-    // Update timer devices prior to entering VM.
+    // Update timer devices after being in the VM, with interupts
+    // disabled, but before doing IRQ updates, so that any interrupts they 
+    //raise get seen immediately.
     v3_update_timers(info);
 
     // Synchronize the guest state to the VMCB
@@ -561,7 +583,9 @@ int v3_svm_enter(struct guest_info * info) {
 #endif
 
     v3_time_enter_vm(info);
-    guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
+    tsc_offset = v3_tsc_host_offset(&info->time_state);
+    guest_ctrl->TSC_OFFSET = tsc_offset;
+
 
     //V3_Print("Calling v3_svm_launch\n");
 
@@ -598,13 +622,11 @@ int v3_svm_enter(struct guest_info * info) {
     info->mem_mode = v3_get_vm_mem_mode(info);
     /* ** */
 
-
     // save exit info here
     exit_code = guest_ctrl->exit_code;
     exit_info1 = guest_ctrl->exit_info1;
     exit_info2 = guest_ctrl->exit_info2;
 
-
 #ifdef V3_CONFIG_SYMCALL
     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
        update_irq_exit_state(info);
@@ -613,20 +635,20 @@ int v3_svm_enter(struct guest_info * info) {
     update_irq_exit_state(info);
 #endif
 
-
     // reenable global interrupts after vm exit
     v3_stgi();
-
  
     // Conditionally yield the CPU if the timeslice has expired
     v3_yield_cond(info);
 
-
-
-    if (v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2) != 0) {
-       PrintError("Error in SVM exit handler\n");
-       PrintError("  last exit was %d\n", v3_last_exit);
-       return -1;
+    {
+       int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
+       
+       if (ret != 0) {
+           PrintError("Error in SVM exit handler (ret=%d)\n", ret);
+           PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
+           return -1;
+       }
     }
 
 
@@ -647,11 +669,20 @@ int v3_start_svm_guest(struct guest_info * info) {
        PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
 
        while (info->core_run_state == CORE_STOPPED) {
+           
+           if (info->vm_info->run_state == VM_STOPPED) {
+               // The VM was stopped before this core was initialized. 
+               return 0;
+           }
+
            v3_yield(info);
            //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
        }
 
        PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
+
+       // We'll be paranoid about race conditions here
+       v3_wait_at_barrier(info);
     } 
 
     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
@@ -711,6 +742,8 @@ int v3_start_svm_guest(struct guest_info * info) {
            break;
        }
 
+       v3_wait_at_barrier(info);
+
 
        if (info->vm_info->run_state == VM_STOPPED) {
            info->core_run_state = CORE_STOPPED;
@@ -810,11 +843,11 @@ int v3_is_svm_capable() {
 
 static int has_svm_nested_paging() {
     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
-
+    
     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
-
+    
     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
-
+    
     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
        V3_Print("SVM Nested Paging not supported\n");
        return 0;
@@ -822,8 +855,8 @@ static int has_svm_nested_paging() {
        V3_Print("SVM Nested Paging supported\n");
        return 1;
     }
-}
-
+ }
 
 
 void v3_init_svm_cpu(int cpu_id) {