Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


correctly handle NMI exits on VMX architectures
[palacios.git] / palacios / src / palacios / vmx.c
index a024fb7..c341982 100644 (file)
@@ -34,6 +34,7 @@
 #include <palacios/vmm_decoder.h>
 #include <palacios/vmm_barrier.h>
 #include <palacios/vmm_timeout.h>
+#include <palacios/vmm_debug.h>
 
 #ifdef V3_CONFIG_CHECKPOINT
 #include <palacios/vmm_checkpoint.h>
@@ -91,11 +92,17 @@ static int inline check_vmcs_read(vmcs_field_t field, void * val) {
 
 
 static addr_t allocate_vmcs() {
+    void *temp;
     struct vmcs_data * vmcs_page = NULL;
 
     PrintDebug("Allocating page\n");
 
-    vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
+    temp = V3_AllocPages(1);
+    if (!temp) { 
+       PrintError("Cannot allocate VMCS\n");
+       return -1;
+    }
+    vmcs_page = (struct vmcs_data *)V3_VAddr(temp);
     memset(vmcs_page, 0, 4096);
 
     vmcs_page->revision = hw_info.basic_info.revision;
@@ -177,9 +184,11 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
 
     /* Add external interrupts, NMI exiting, and virtual NMI */
     vmx_state->pin_ctrls.nmi_exit = 1;
+    vmx_state->pin_ctrls.virt_nmi = 1;
     vmx_state->pin_ctrls.ext_int_exit = 1;
 
 
+
     /* We enable the preemption timer by default to measure accurate guest time */
     if (avail_pin_ctrls.active_preempt_timer) {
        V3_Print("VMX Preemption Timer is available\n");
@@ -187,10 +196,17 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
        vmx_state->exit_ctrls.save_preempt_timer = 1;
     }
 
+    // we want it to use this when halting
     vmx_state->pri_proc_ctrls.hlt_exit = 1;
 
+    // cpuid tells it that it does not have these instructions
+    vmx_state->pri_proc_ctrls.monitor_exit = 1;
+    vmx_state->pri_proc_ctrls.mwait_exit = 1;
 
+    // we don't need to handle a pause, although this is where
+    // we could pull out of a spin lock acquire or schedule to find its partner
     vmx_state->pri_proc_ctrls.pause_exit = 0;
+
     vmx_state->pri_proc_ctrls.tsc_offset = 1;
 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
@@ -232,6 +248,11 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
     // Setup Guests initial PAT field
     vmx_ret |= check_vmcs_write(VMCS_GUEST_PAT, 0x0007040600070406LL);
 
+    // Capture CR8 mods so that we can keep the apic_tpr correct
+    vmx_state->pri_proc_ctrls.cr8_ld_exit = 1;
+    vmx_state->pri_proc_ctrls.cr8_str_exit = 1;
+
+
     /* Setup paging */
     if (core->shdw_pg_mode == SHADOW_PAGING) {
         PrintDebug("Creating initial shadow page table\n");
@@ -399,7 +420,7 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
        vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
 #define CR0_NE 0x00000020
        vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, CR0_NE);
-       //((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->ne = 1;
+       ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->ne = 1;
 
        if (v3_init_ept(core, &hw_info) == -1) {
            PrintError("Error initializing EPT\n");
@@ -517,6 +538,7 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
 #endif
 
 
+
  
 
     if (v3_update_vmcs_ctrl_fields(core)) {
@@ -546,6 +568,12 @@ static void __init_vmx_vmcs(void * arg) {
     int vmx_ret = 0;
     
     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
+
+    if (!vmx_state) {
+       PrintError("Unable to allocate in initializing vmx vmcs\n");
+       return;
+    }
+
     memset(vmx_state, 0, sizeof(struct vmx_data));
 
     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
@@ -643,8 +671,11 @@ int v3_vmx_save_core(struct guest_info * core, void * ctx){
     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
 
     // note that the vmcs pointer is an HPA, but we need an HVA
-    v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE_4KB, V3_VAddr((void*)
-                                                           (vmx_info->vmcs_ptr_phys)));
+    if (v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE_4KB, 
+                     V3_VAddr((void*) (vmx_info->vmcs_ptr_phys))) ==-1) {
+       PrintError("Could not save vmcs data for VMX\n");
+       return -1;
+    }
 
     return 0;
 }
@@ -655,8 +686,17 @@ int v3_vmx_load_core(struct guest_info * core, void * ctx){
     addr_t vmcs_page_paddr;  //HPA
 
     vmcs_page_paddr = (addr_t) V3_AllocPages(1);
+    
+    if (!vmcs_page_paddr) { 
+       PrintError("Could not allocate space for a vmcs in VMX\n");
+       return -1;
+    }
 
-    v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, V3_VAddr((void *)vmcs_page_paddr));
+    if (v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, 
+                     V3_VAddr((void *)vmcs_page_paddr)) == -1) { 
+       PrintError("Could not load vmcs data for VMX\n");
+       return -1;
+    }
 
     vmcs_clear(vmx_info->vmcs_ptr_phys);
 
@@ -880,7 +920,7 @@ int
 v3_vmx_config_tsc_virtualization(struct guest_info * info) {
     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
 
-    if (info->time_state.time_flags & V3_TIME_TRAP_RDTSC) {
+    if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
        if  (!vmx_info->pri_proc_ctrls.rdtsc_exit) {
            vmx_info->pri_proc_ctrls.rdtsc_exit = 1;
            check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
@@ -894,7 +934,11 @@ v3_vmx_config_tsc_virtualization(struct guest_info * info) {
            check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
        }
 
-        tsc_offset = v3_tsc_host_offset(&info->time_state);
+       if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
+           tsc_offset = 0;
+       } else {
+            tsc_offset = v3_tsc_host_offset(&info->time_state);
+       }
         tsc_offset_high = (uint32_t)(( tsc_offset >> 32) & 0xffffffff);
         tsc_offset_low = (uint32_t)(tsc_offset & 0xffffffff);
 
@@ -919,18 +963,18 @@ int v3_vmx_enter(struct guest_info * info) {
     uint64_t guest_cycles = 0;
 
     // Conditionally yield the CPU if the timeslice has expired
-    v3_yield_cond(info);
-
-    // disable global interrupts for vm state transition
-    v3_disable_ints();
+    v3_yield_cond(info,-1);
 
     // Update timer devices late after being in the VM so that as much 
     // of the time in the VM is accounted for as possible. Also do it before
     // updating IRQ entry state so that any interrupts the timers raise get 
-    // handled on the next VM entry. Must be done with interrupts disabled.
-    v3_advance_time(info);
+    // handled on the next VM entry.
+    v3_advance_time(info, NULL);
     v3_update_timers(info);
 
+    // disable global interrupts for vm state transition
+    v3_disable_ints();
+
     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
        vmcs_clear(vmx_info->vmcs_ptr_phys);
        vmcs_load(vmx_info->vmcs_ptr_phys);
@@ -955,12 +999,9 @@ int v3_vmx_enter(struct guest_info * info) {
     }
 
 
-    // Perform last-minute time bookkeeping prior to entering the VM
-    v3_time_enter_vm(info);
+    // Perform last-minute time setup prior to entering the VM
     v3_vmx_config_tsc_virtualization(info);
 
-    
-
     if (v3_update_vmcs_host_state(info)) {
        v3_enable_ints();
         PrintError("Could not write host state\n");
@@ -1027,8 +1068,7 @@ int v3_vmx_enter(struct guest_info * info) {
     }
 
     // Immediate exit from VM time bookkeeping
-    v3_time_exit_vm(info, &guest_cycles);
-
+    v3_advance_time(info, &guest_cycles);
 
     /* Update guest state */
     v3_vmx_save_vmcs(info);
@@ -1076,11 +1116,25 @@ int v3_vmx_enter(struct guest_info * info) {
 #endif
     }
 
+
+    // Lastly we check for an NMI exit, and reinject if so
+    {
+       struct vmx_basic_exit_info * basic_info = (struct vmx_basic_exit_info *)&(exit_info.exit_reason);
+
+       if (basic_info->reason == VMX_EXIT_INFO_EXCEPTION_OR_NMI) {
+           if ((uint8_t)exit_info.int_info == 2) {
+               asm("int $2");
+           }
+       }
+    }
+
     // reenable global interrupts after vm exit
     v3_enable_ints();
 
     // Conditionally yield the CPU if the timeslice has expired
-    v3_yield_cond(info);
+    v3_yield_cond(info,-1);
+    v3_advance_time(info, NULL);
+    v3_update_timers(info);
 
     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
        PrintError("Error in VMX exit handler (Exit reason=%x)\n", exit_info.exit_reason);
@@ -1113,7 +1167,7 @@ int v3_start_vmx_guest(struct guest_info * info) {
                return 0;
            }
 
-            v3_yield(info);
+            v3_yield(info,-1);
             //PrintDebug("VMX core %u: still waiting for INIT\n",info->vcpu_id);
         }