Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


moved VM runstate settings to avoid race condition
[palacios.git] / palacios / src / palacios / vmx.c
index f1d0939..5746e93 100644 (file)
 #include <palacios/vmm_direct_paging.h>
 #include <palacios/vmx_io.h>
 #include <palacios/vmx_msr.h>
+#include <palacios/vmm_decoder.h>
+#include <palacios/vmm_barrier.h>
+
+#ifdef V3_CONFIG_CHECKPOINT
+#include <palacios/vmm_checkpoint.h>
+#endif
 
 #include <palacios/vmx_ept.h>
 #include <palacios/vmx_assist.h>
@@ -130,53 +136,6 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
     /******* Setup Host State **********/
 
     /* Cache GDTR, IDTR, and TR in host struct */
-    addr_t gdtr_base;
-    struct {
-        uint16_t selector;
-        addr_t   base;
-    } __attribute__((packed)) tmp_seg;
-    
-
-    __asm__ __volatile__(
-                        "sgdt (%0);"
-                        :
-                        : "q"(&tmp_seg)
-                        : "memory"
-                        );
-    gdtr_base = tmp_seg.base;
-    vmx_state->host_state.gdtr.base = gdtr_base;
-
-    __asm__ __volatile__(
-                        "sidt (%0);"
-                        :
-                        : "q"(&tmp_seg)
-                        : "memory"
-                        );
-    vmx_state->host_state.idtr.base = tmp_seg.base;
-
-    __asm__ __volatile__(
-                        "str (%0);"
-                        :
-                        : "q"(&tmp_seg)
-                        : "memory"
-                        );
-    vmx_state->host_state.tr.selector = tmp_seg.selector;
-
-    /* The GDTR *index* is bits 3-15 of the selector. */
-    struct tss_descriptor * desc = NULL;
-    desc = (struct tss_descriptor *)(gdtr_base + (8 * (tmp_seg.selector >> 3)));
-
-    tmp_seg.base = ((desc->base1) |
-                   (desc->base2 << 16) |
-                   (desc->base3 << 24) |
-#ifdef __V3_64BIT__
-                   ((uint64_t)desc->base4 << 32)
-#else 
-                   (0)
-#endif
-                   );
-
-    vmx_state->host_state.tr.base = tmp_seg.base;
 
 
     /********** Setup VMX Control Fields ***********/
@@ -207,10 +166,6 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
 
 
 
-    
-
-
-
 #ifdef __V3_64BIT__
     // Ensure host runs in 64-bit mode at each VM EXIT
     vmx_state->exit_ctrls.host_64_on = 1;
@@ -394,13 +349,6 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
     
     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
     {
-#define IA32_STAR 0xc0000081
-#define IA32_LSTAR 0xc0000082
-#define IA32_FMASK 0xc0000084
-#define IA32_KERN_GS_BASE 0xc0000102
-
-#define IA32_CSTAR 0xc0000083 // Compatibility mode STAR (ignored for now... hopefully its not that important...)
-
        int msr_ret = 0;
 
        struct vmcs_msr_entry * exit_store_msrs = NULL;
@@ -432,24 +380,42 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
        entry_load_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area + (sizeof(struct vmcs_msr_entry) * 8));
 
 
-       exit_store_msrs[0].index = IA32_STAR;
-       exit_store_msrs[1].index = IA32_LSTAR;
-       exit_store_msrs[2].index = IA32_FMASK;
-       exit_store_msrs[3].index = IA32_KERN_GS_BASE;
+       exit_store_msrs[0].index = IA32_STAR_MSR;
+       exit_store_msrs[1].index = IA32_LSTAR_MSR;
+       exit_store_msrs[2].index = IA32_FMASK_MSR;
+       exit_store_msrs[3].index = IA32_KERN_GS_BASE_MSR;
        
        memcpy(exit_store_msrs, exit_load_msrs, sizeof(struct vmcs_msr_entry) * 4);
        memcpy(exit_store_msrs, entry_load_msrs, sizeof(struct vmcs_msr_entry) * 4);
 
        
-       v3_get_msr(IA32_STAR, &(exit_load_msrs[0].hi), &(exit_load_msrs[0].lo));
-       v3_get_msr(IA32_LSTAR, &(exit_load_msrs[1].hi), &(exit_load_msrs[1].lo));
-       v3_get_msr(IA32_FMASK, &(exit_load_msrs[2].hi), &(exit_load_msrs[2].lo));
-       v3_get_msr(IA32_KERN_GS_BASE, &(exit_load_msrs[3].hi), &(exit_load_msrs[3].lo));
+       v3_get_msr(IA32_STAR_MSR, &(exit_load_msrs[0].hi), &(exit_load_msrs[0].lo));
+       v3_get_msr(IA32_LSTAR_MSR, &(exit_load_msrs[1].hi), &(exit_load_msrs[1].lo));
+       v3_get_msr(IA32_FMASK_MSR, &(exit_load_msrs[2].hi), &(exit_load_msrs[2].lo));
+       v3_get_msr(IA32_KERN_GS_BASE_MSR, &(exit_load_msrs[3].hi), &(exit_load_msrs[3].lo));
 
        msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(exit_store_msrs));
        msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(exit_load_msrs));
        msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(entry_load_msrs));
 
+
+       v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
+
+
+       // IMPORTANT: These SYSCALL MSRs are currently not handled by hardware or cached
+       // We should really emulate these ourselves, or ideally include them in the MSR store area if there is room
+       v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
+       
+       v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
+       v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
+       
+
     }    
 
     /* Sanity check ctrl/reg fields against hw_defaults */
@@ -557,6 +523,63 @@ int v3_deinit_vmx_vmcs(struct guest_info * core) {
 }
 
 
+
+#ifdef V3_CONFIG_CHECKPOINT
+/* 
+ * JRL: This is broken
+ */
+int v3_vmx_save_core(struct guest_info * core, void * ctx){
+    uint64_t vmcs_ptr = vmcs_store();
+
+    v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE, (void *)vmcs_ptr);
+
+    return 0;
+}
+
+int v3_vmx_load_core(struct guest_info * core, void * ctx){
+    struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
+    struct cr0_32 * shadow_cr0;
+    char vmcs[PAGE_SIZE_4KB];
+
+    v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, vmcs);
+
+    vmcs_clear(vmx_info->vmcs_ptr_phys);
+    vmcs_load((addr_t)vmcs);
+
+    v3_vmx_save_vmcs(core);
+
+    shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
+
+
+    /* Get the CPU mode to set the guest_ia32e entry ctrl */
+
+    if (core->shdw_pg_mode == SHADOW_PAGING) {
+       if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
+           if (v3_activate_shadow_pt(core) == -1) {
+               PrintError("Failed to activate shadow page tables\n");
+               return -1;
+           }
+       } else {
+           if (v3_activate_passthrough_pt(core) == -1) {
+               PrintError("Failed to activate passthrough page tables\n");
+               return -1;
+           }
+       }
+    }
+
+    return 0;
+}
+#endif
+
+
+void v3_flush_vmx_vm_core(struct guest_info * core) {
+    struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
+    vmcs_clear(vmx_info->vmcs_ptr_phys);
+    vmx_info->state = VMX_UNLAUNCHED;
+}
+
+
+
 static int update_irq_exit_state(struct guest_info * info) {
     struct vmx_exit_idt_vec_info idt_vec_info;
 
@@ -758,7 +781,9 @@ int v3_vmx_enter(struct guest_info * info) {
     v3_update_timers(info);
 
     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
+       vmcs_clear(vmx_info->vmcs_ptr_phys);
        vmcs_load(vmx_info->vmcs_ptr_phys);
+       vmx_info->state = VMX_UNLAUNCHED;
     }
 
     v3_vmx_restore_vmcs(info);
@@ -795,26 +820,28 @@ int v3_vmx_enter(struct guest_info * info) {
 
     if (vmx_info->state == VMX_UNLAUNCHED) {
        vmx_info->state = VMX_LAUNCHED;
-       info->vm_info->run_state = VM_RUNNING;
        ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
     } else {
        V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
        ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
     }
     
+
+
     //  PrintDebug("VMX Exit: ret=%d\n", ret);
 
     if (ret != VMX_SUCCESS) {
        uint32_t error = 0;
-
         vmcs_read(VMCS_INSTR_ERR, &error);
 
        v3_enable_ints();
 
-        PrintError("VMENTRY Error: %d\n", error);
+       PrintError("VMENTRY Error: %d (launch_ret = %d)\n", error, ret);
        return -1;
     }
 
+
+
     // Immediate exit from VM time bookkeeping
     v3_time_exit_vm(info);
 
@@ -871,7 +898,7 @@ int v3_vmx_enter(struct guest_info * info) {
     v3_yield_cond(info);
 
     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
-       PrintError("Error in VMX exit handler\n");
+       PrintError("Error in VMX exit handler (Exit reason=%x)\n", exit_info.exit_reason);
        return -1;
     }
 
@@ -885,17 +912,25 @@ int v3_start_vmx_guest(struct guest_info * info) {
 
     if (info->vcpu_id == 0) {
        info->core_run_state = CORE_RUNNING;
-       info->vm_info->run_state = VM_RUNNING;
     } else {
 
         PrintDebug("VMX core %u: Waiting for core initialization\n", info->vcpu_id);
 
         while (info->core_run_state == CORE_STOPPED) {
+
+           if (info->vm_info->run_state == VM_STOPPED) {
+               // The VM was stopped before this core was initialized. 
+               return 0;
+           }
+
             v3_yield(info);
             //PrintDebug("VMX core %u: still waiting for INIT\n",info->vcpu_id);
         }
        
        PrintDebug("VMX core %u initialized\n", info->vcpu_id);
+
+       // We'll be paranoid about race conditions here
+       v3_wait_at_barrier(info);
     }
 
 
@@ -916,11 +951,40 @@ int v3_start_vmx_guest(struct guest_info * info) {
        }
 
        if (v3_vmx_enter(info) == -1) {
+
+           addr_t host_addr;
+            addr_t linear_addr = 0;
+            
+            info->vm_info->run_state = VM_ERROR;
+            
+            V3_Print("VMX core %u: VMX ERROR!!\n", info->vcpu_id); 
+            
+            v3_print_guest_state(info);
+            
+            V3_Print("VMX core %u\n", info->vcpu_id); 
+
+            linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
+            
+            if (info->mem_mode == PHYSICAL_MEM) {
+                v3_gpa_to_hva(info, linear_addr, &host_addr);
+            } else if (info->mem_mode == VIRTUAL_MEM) {
+                v3_gva_to_hva(info, linear_addr, &host_addr);
+            }
+            
+            V3_Print("VMX core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
+            
+            V3_Print("VMX core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
+            v3_dump_mem((uint8_t *)host_addr, 15);
+            
+            v3_print_stack(info);
+
+
            v3_print_vmcs();
            print_exit_log(info);
            return -1;
        }
 
+       v3_wait_at_barrier(info);
 
 
        if (info->vm_info->run_state == VM_STOPPED) {