Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


SVM Bug Fixes and Enhancements
Peter Dinda [Sun, 2 Aug 2015 22:59:49 +0000 (17:59 -0500)]
- Correct behavior of exit during injection of SW intr / exception / other events (BIG ONE)
- Sanity-checked deallocations
- VMCB, etc, modernized to current versions
- Sanity-check to see if we are on old hardware that doesn't do nrip / SW intr
- Spot interrupt shadows

palacios/include/palacios/vmcb.h
palacios/src/palacios/svm.c
palacios/src/palacios/svm_io.c
palacios/src/palacios/svm_msr.c

index 5effea4..7b7cec8 100644 (file)
 #include <palacios/vm_guest.h>
 
 #define VMCB_CTRL_AREA_OFFSET                   0x0
+#define VMCB_CTRL_AREA_SIZE                     0x400
 #define VMCB_STATE_SAVE_AREA_OFFSET             0x400
-
+#define VMCB_STATE_SAVE_AREA_SIZE               0x298
+#define VMCB_END_OFFSET                         (VMCB_CTRL_AREA_SIZE+VMCB_STATE_SAVE_AREA_SIZE)
+#define VMCB_TOTAL_SIZE                         0x1000
 
 #define GET_VMCB_CTRL_AREA(page)         (page + VMCB_CTRL_AREA_OFFSET)
 #define GET_VMCB_SAVE_STATE_AREA(page)   (page + VMCB_STATE_SAVE_AREA_OFFSET)
@@ -162,7 +165,8 @@ struct SVM_Instr_Intercepts {
     uint_t MONITOR    : 1;
     uint_t MWAIT_always : 1;
     uint_t MWAIT_if_armed : 1;
-    uint_t reserved  : 19;  // Should be 0
+    uint_t XSETBV  : 1;
+    uint_t reserved  : 18;  // Should be 0
 } __attribute__((packed));
 
 
@@ -174,7 +178,8 @@ struct Guest_Control {
     uint_t V_IGN_TPR  : 1;
     uint_t rsvd2      : 3;  // Should be 0
     uint_t V_INTR_MASKING : 1;
-    uint_t rsvd3      : 7;  // Should be 0
+    uint_t rsvd3      : 6;  // Should be 0
+    uint_t AVIC_enable : 1;
     uchar_t V_INTR_VECTOR;
     uint_t rsvd4      : 24;  // Should be 0
 } __attribute__((packed));
@@ -196,72 +201,104 @@ struct Interrupt_Info {
 
 
 struct VMCB_Control_Area {
-    // offset 0x0
+    // offset 0x000
     struct Ctrl_Registers cr_reads;
     struct Ctrl_Registers cr_writes;
+    // offset 0x004
     struct Debug_Registers dr_reads;
     struct Debug_Registers dr_writes;
+    // offset 0x008
     struct Exception_Vectors exceptions;
+    // offset 0x00c
     struct Instr_Intercepts instrs;
+    // offset 0x010
     struct SVM_Instr_Intercepts svm_instrs;
 
-    uchar_t rsvd1[44];  // Should be 0
+    // offset 0x014
+    uchar_t rsvd1[40];  // Should be 0
+
+    // offset 0x03c
+    uint16_t pause_filter_threshold;
+    uint16_t pause_filter_count;
 
     // offset 0x040
     ullong_t IOPM_BASE_PA;
     ullong_t MSRPM_BASE_PA;
     ullong_t TSC_OFFSET;
-
+    
+    // offset 0x058
     uint_t guest_ASID;
     uchar_t TLB_CONTROL;
 
     uchar_t rsvd2[3];  // Should be 0
 
+    // offset 0x060
     struct Guest_Control guest_ctrl;
   
-    uint_t interrupt_shadow  : 1;
-    uint_t rsvd3             : 31;  // Should be 0
-    uint_t rsvd4;  // Should be 0
+    // offset 0x068
+    uint_t   interrupt_shadow  : 1;
+    uint64_t rsvd3             : 63;  // Should be 0
 
+    // offset 0x070
     ullong_t exit_code;
     ullong_t exit_info1;
     ullong_t exit_info2;
 
-    /* This could be a typo in the manual....
-     * It doesn't actually say that there is a reserved bit
-     * But it does say that the EXITINTINFO field is in bits 63-1
-     * ALL other occurances mention a 1 bit reserved field
-     */
-    //  uint_t rsvd5             : 1;
-    //ullong_t exit_int_info   : 63;
-    /* ** */
-
-    // AMD Manual 2, pg 391, sect: 15.19
+    // offset 0x088
     struct Interrupt_Info exit_int_info;
 
-    //  uint_t NP_ENABLE         : 1;
-    //ullong_t rsvd6           : 63;  // Should be 0 
-    ullong_t NP_ENABLE;
+    // offset 0x090
+    uint_t NP_ENABLE : 1;
+    uint64_t rsvd4 : 63;
 
-    uchar_t rsvd7[16];  // Should be 0
+    // offet 0x098
+    uint64_t AVIC_APIC_BAR: 52;
+    uint64_t rsvd5 : 12;
 
-    // Offset 0xA8
-    struct Interrupt_Info EVENTINJ;
+    // offset 0x0a0
+    uchar_t rsvd6[8];  // Should be 0
 
+    // offset 0x0a8
+    struct Interrupt_Info EVENTINJ;
 
-    /* This could be a typo in the manual....
-     * It doesn't actually say that there is a reserved bit
-     * But it does say that the EXITINTINFO field is in bits 63-1
-     * ALL other occurances mention a 1 bit reserved field
-     */
-    //  uint_t rsvd8              : 1;
-    //ullong_t N_CR3            : 63;
+    // offset 0x0b0
     ullong_t N_CR3;
-    /* ** */
-
 
+    // offset 0x0b8
     uint_t LBR_VIRTUALIZATION_ENABLE : 1;
-    ullong_t rsvd9            : 63;   // Should be 0
+    ullong_t rsvd7            : 63;   // Should be 0
+
+    // offset 0x0c0
+    uint32_t clean_bits;         // used for VMCB caching  
+    uint32_t rsvd8;
+    
+    // offset 0x0c8
+    uint64_t nrip;         
+
+    // offset 0x0d0
+    // fetch of instruction
+    uint8_t  num_ifetch_bytes; 
+    uint8_t  ifetch_bytes[15];
+    
+    // offset 0x0e0
+    uint64_t AVIC_APIC_backing_page : 52; // page-aligned
+    uint64_t rsvd9 : 12;
+    
+    // offset 0x0e8
+    uint64_t rsvd10;
+
+    // offset 0x0f0
+    uint64_t AVIC_logical_table : 52; // page-aligned
+    uint64_t rsvd11 : 12;
+    
+    // offset 0xf8
+    uint8_t AVIC_PHYSICAL_MAX_INDEX;
+    uint8_t rsvd12: 4;
+    uint64_t AVIC_PHYSICAL_TABLE_PTR: 40; // page-aligned
+    uint64_t rsvd13: 12; 
+
+    // offset 0x100 space from here to 0x3ff should be zero
+    uint8_t  rsvd_tail[VMCB_CTRL_AREA_SIZE-0x100];
 
 } __attribute__((packed));
 
@@ -371,6 +408,8 @@ struct VMCB_State_Save_Area {
     ullong_t lastexcpto; // Guest LastExceptionToIP MSR 
     //   -- only used if the LBR registers are virtualized
 
+    // offset 0x298
+    // Remainder to end of page is reserved and zero
 } __attribute__((packed));
 
 
index a007201..6ecdb12 100644 (file)
@@ -165,7 +165,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
     ctrl_area->svm_instrs.MONITOR = 1;
     ctrl_area->svm_instrs.MWAIT_always = 1;
     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
-    ctrl_area->instrs.INVLPGA = 1;   // invalidate page in asid... why?
+    ctrl_area->instrs.INVLPGA = 1;   // invalidate page in asid... AMD ERRATA
     ctrl_area->instrs.CPUID = 1;
 
     ctrl_area->instrs.HLT = 1;
@@ -364,8 +364,6 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
        // Enable Nested Paging
        ctrl_area->NP_ENABLE = 1;
 
-       PrintDebug(core->vm_info, core, "NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
-
        // Set the Nested Page Table pointer
        if (core->core_run_state == CORE_INVALID) { 
            if (v3_init_passthrough_pts(core) == -1) {
@@ -439,7 +437,7 @@ int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
 
 
 int v3_deinit_svm_vmcb(struct guest_info * core) {
-    if (core->vmm_data) { 
+    if (core && core->vmm_data) { 
        V3_FreePages(V3_PAddr(core->vmm_data), 1);
     }
     return 0;
@@ -625,6 +623,15 @@ static int update_irq_exit_state(struct guest_info * info) {
 static int update_irq_entry_state(struct guest_info * info) {
     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
 
+    if (guest_ctrl->exit_int_info.valid) {
+       // We need to complete the previous injection
+       guest_ctrl->EVENTINJ = guest_ctrl->exit_int_info;
+
+       PrintDebug(info->vm_info,info,"Continuing injection of event - eventinj=0x%llx\n",*(uint64_t*)&guest_ctrl->EVENTINJ);
+
+       return 0;
+    }
+
 
     if (info->intr_core_state.irq_pending == 0) {
        guest_ctrl->guest_ctrl.V_IRQ = 0;
@@ -632,20 +639,24 @@ static int update_irq_entry_state(struct guest_info * info) {
     }
     
     if (v3_excp_pending(info)) {
+
        uint_t excp = v3_get_excp_number(info);
        
        guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
-       
+       guest_ctrl->EVENTINJ.vector = excp;
+
        if (info->excp_state.excp_error_code_valid) {
            guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
            guest_ctrl->EVENTINJ.ev = 1;
 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
            PrintDebug(info->vm_info, info, "Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
 #endif
+       } else {
+           guest_ctrl->EVENTINJ.error_code = 0;
+           guest_ctrl->EVENTINJ.ev = 0;
        }
-       
-       guest_ctrl->EVENTINJ.vector = excp;
-       
+
+       guest_ctrl->EVENTINJ.rsvd = 0;
        guest_ctrl->EVENTINJ.valid = 1;
 
 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
@@ -657,7 +668,9 @@ static int update_irq_entry_state(struct guest_info * info) {
 #endif
 
        v3_injecting_excp(info, excp);
+
     } else if (info->intr_core_state.irq_started == 1) {
+
 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
        PrintDebug(info->vm_info, info, "IRQ pending from previous injection\n");
 #endif
@@ -699,20 +712,33 @@ static int update_irq_entry_state(struct guest_info * info) {
                
            }
            case V3_NMI:
+#ifdef V3_CONFIG_DEBUG_INTERRUPTS
+               PrintDebug(info->vm_info, info, "Injecting NMI\n");
+#endif
                guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
+               guest_ctrl->EVENTINJ.ev = 0;
+               guest_ctrl->EVENTINJ.error_code = 0;
+               guest_ctrl->EVENTINJ.rsvd = 0;
+               guest_ctrl->EVENTINJ.valid = 1;
+
                break;
+
            case V3_SOFTWARE_INTR:
-               guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
                PrintDebug(info->vm_info, info, "Injecting software interrupt --  type: %d, vector: %d\n", 
                           SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
 #endif
+               guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
                guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
+               guest_ctrl->EVENTINJ.ev = 0;
+               guest_ctrl->EVENTINJ.error_code = 0;
+               guest_ctrl->EVENTINJ.rsvd = 0;
                guest_ctrl->EVENTINJ.valid = 1;
             
                /* reset swintr state */
                info->intr_core_state.swintr_posted = 0;
                info->intr_core_state.swintr_vector = 0;
+
                break;
            case V3_VIRTUAL_IRQ:
                guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
@@ -871,6 +897,12 @@ int v3_svm_enter(struct guest_info * info) {
 #endif
 
 
+       if (guest_ctrl->EVENTINJ.valid && guest_ctrl->interrupt_shadow) { 
+#ifdef V3_CONFIG_DEBUG_INTERRUPTS
+           PrintDebug(info->vm_info,info,"Event injection during an interrupt shadow\n");
+#endif
+       }
+
        rdtscll(entry_tsc);
 
        v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
@@ -1132,6 +1164,7 @@ int v3_start_svm_guest(struct guest_info * info) {
        
 
        if (info->vm_info->run_state == VM_STOPPED) {
+           PrintDebug(info->vm_info,info,"Stopping core as VM is stopped\n");
            info->core_run_state = CORE_STOPPED;
            break;
        }
@@ -1229,6 +1262,10 @@ int v3_is_svm_capable() {
            PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
            PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
 
+           if (!(edx & 0x8)) { 
+             PrintError(VM_NONE,VCORE_NONE, "WARNING: NO SVM SUPPORT FOR NRIP - SW INTR INJECTION WILL LIKELY FAIL\n");
+           }
+
            return 1;
        }
     }
index 91ba659..396f6ce 100644 (file)
@@ -68,7 +68,9 @@ int v3_init_svm_io_map(struct v3_vm_info * vm) {
 }
 
 int v3_deinit_svm_io_map(struct v3_vm_info * vm) {
-    V3_FreePages(V3_PAddr(vm->io_map.arch_data), 3);
+    if (vm->io_map.arch_data) { 
+       V3_FreePages(V3_PAddr(vm->io_map.arch_data), 3);
+    }
     return 0;
 }
 
index 08b1569..bdc8569 100644 (file)
@@ -104,6 +104,8 @@ int v3_init_svm_msr_map(struct v3_vm_info * vm) {
 }
 
 int v3_deinit_svm_msr_map(struct v3_vm_info * vm) {
-    V3_FreePages(V3_PAddr(vm->msr_map.arch_data), 2);
+    if (vm->msr_map.arch_data) { 
+       V3_FreePages(V3_PAddr(vm->msr_map.arch_data), 2);
+    }
     return 0;
 }