Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


first cut at nested paging
Jack Lange [Wed, 25 Mar 2009 21:36:44 +0000 (16:36 -0500)]
palacios/include/palacios/vmm.h
palacios/include/palacios/vmm_direct_paging.h
palacios/src/palacios/svm.c
palacios/src/palacios/svm_handler.c
palacios/src/palacios/vmm_config.c
palacios/src/palacios/vmm_direct_paging.c
palacios/src/palacios/vmm_direct_paging_32.h
palacios/src/palacios/vmm_direct_paging_32pae.h
palacios/src/palacios/vmm_direct_paging_64.h

index 9e9f812..4a4ed79 100644 (file)
@@ -273,6 +273,7 @@ struct v3_vm_config {
 
 
     int enable_profiling;
+    int enable_nested_paging;
 
     int use_ramdisk;
     void * ramdisk;
index 3bae7e3..22843fd 100644 (file)
@@ -29,6 +29,7 @@
 addr_t v3_create_direct_passthrough_pts(struct guest_info * guest_info);
 
 int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code);
+int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code);
 
 #endif // ! __V3VEE__
 
index d578a17..274d20f 100644 (file)
@@ -72,11 +72,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
 
     guest_state->cpl = 0;
 
-    //ctrl_area->instrs.instrs.CR0 = 1;
-    ctrl_area->cr_reads.cr0 = 1;
-    ctrl_area->cr_writes.cr0 = 1;
-    //ctrl_area->cr_reads.cr4 = 1;
-    ctrl_area->cr_writes.cr4 = 1;
+
 
 
     /* Set up the efer to enable 64 bit page tables */
@@ -90,15 +86,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
       cr4->pae = 1;
       }
     */
-
     guest_state->efer |= EFER_MSR_svm_enable;
-    vm_info->guest_efer.value = 0x0LL;
-    
-    v3_hook_msr(vm_info, EFER_MSR, 
-               &v3_handle_efer_read,
-               &v3_handle_efer_write, 
-               vm_info);
-
 
 
     guest_state->rflags = 0x00000002; // The reserved bit is always 1
@@ -115,11 +103,12 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
     ctrl_area->svm_instrs.MONITOR = 1;
     ctrl_area->svm_instrs.MWAIT_always = 1;
     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
+    ctrl_area->instrs.INVLPGA = 1;
 
 
     ctrl_area->instrs.HLT = 1;
     // guest_state->cr0 = 0x00000001;    // PE 
-    ctrl_area->guest_ASID = 1;
+
 
   
     /*
@@ -235,28 +224,33 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
     if (vm_info->shdw_pg_mode == SHADOW_PAGING) {
        PrintDebug("Creating initial shadow page table\n");
        
+       ctrl_area->guest_ASID = 1;
        
-       
-       /* Testing 64 bit page tables for long paged real mode guests */
-       //    vm_info->direct_map_pt = (addr_t)V3_PAddr(create_passthrough_pts_64(vm_info));
        vm_info->direct_map_pt = (addr_t)V3_PAddr((void *)v3_create_direct_passthrough_pts(vm_info));
-       /* End Test */
        
        vm_info->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
        PrintDebug("Created\n");
        
-       
        guest_state->cr3 = vm_info->direct_map_pt;
 
-
-       //PrintDebugPageTables((pde32_t*)(vm_info->shdw_pg_state.shadow_cr3.e_reg.low));
-       
+       ctrl_area->cr_reads.cr0 = 1;
+       ctrl_area->cr_writes.cr0 = 1;
+       //ctrl_area->cr_reads.cr4 = 1;
+       ctrl_area->cr_writes.cr4 = 1;
        ctrl_area->cr_reads.cr3 = 1;
        ctrl_area->cr_writes.cr3 = 1;
 
 
+       vm_info->guest_efer.value = 0x0LL;
+    
+       v3_hook_msr(vm_info, EFER_MSR, 
+                   &v3_handle_efer_read,
+                   &v3_handle_efer_write, 
+                   vm_info);
+
+
        ctrl_area->instrs.INVLPG = 1;
-       ctrl_area->instrs.INVLPGA = 1;
+
        
        ctrl_area->exceptions.pf = 1;
        
@@ -264,7 +258,6 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
        /* We need to fix this */
        ctrl_area->TLB_CONTROL = 1;
        
-       
        guest_state->g_pat = 0x7040600070406ULL;
        
        guest_state->cr0 |= 0x80000000;
@@ -272,6 +265,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
     } else if (vm_info->shdw_pg_mode == NESTED_PAGING) {
        // Flush the TLB on entries/exits
        ctrl_area->TLB_CONTROL = 1;
+       ctrl_area->guest_ASID = 1;
        
        // Enable Nested Paging
        ctrl_area->NP_ENABLE = 1;
@@ -279,7 +273,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
        PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
        
        // Set the Nested Page Table pointer
-       vm_info->direct_map_pt = ((addr_t)v3_create_direct_passthrough_pts(vm_info) & ~0xfff);
+       vm_info->direct_map_pt = (addr_t)V3_PAddr((void *)v3_create_direct_passthrough_pts(vm_info));
        ctrl_area->N_CR3 = vm_info->direct_map_pt;
        
        //   ctrl_area->N_CR3 = Get_CR3();
@@ -557,6 +551,7 @@ static int has_svm_nested_paging() {
 void v3_init_SVM(struct v3_ctrl_ops * vmm_ops) {
     reg_ex_t msr;
     void * host_state;
+    extern v3_cpu_arch_t v3_cpu_type;
 
     // Enable SVM on the CPU
     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
@@ -578,7 +573,11 @@ void v3_init_SVM(struct v3_ctrl_ops * vmm_ops) {
     PrintDebug("Host State being saved at %p\n", (void *)(addr_t)host_state);
     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
 
-
+    if (has_svm_nested_paging() == 1) {
+       v3_cpu_type = V3_SVM_REV3_CPU;
+    } else {
+       v3_cpu_type = V3_SVM_CPU;
+    }
 
     // Setup the SVM specific vmm operations
     vmm_ops->init_guest = &init_svm_guest;
index aaed6c6..931bb99 100644 (file)
@@ -32,7 +32,7 @@
 #include <palacios/svm_msr.h>
 #include <palacios/vmm_profiler.h>
 #include <palacios/vmm_hypercall.h>
-
+#include <palacios/vmm_direct_paging.h>
 
 
 
@@ -247,12 +247,20 @@ int v3_handle_svm_exit(struct guest_info * info) {
            }
            break;
        } 
-       case VMEXIT_NPF: 
+       case VMEXIT_NPF: {
+           addr_t fault_addr = guest_ctrl->exit_info2;
+           pf_error_t * error_code = (pf_error_t *)&(guest_ctrl->exit_info1);
 
-           PrintError("Currently unhandled Nested Page Fault\n");
-           return -1;
-               
+           if (info->shdw_pg_mode == NESTED_PAGING) {
+               if (v3_handle_nested_pagefault(info, fault_addr, *error_code) == -1) {
+                   return -1;
+               }
+           } else {
+               PrintError("Currently unhandled Nested Page Fault\n");
+               return -1;
+                   }
            break;
+           }
        case VMEXIT_INVLPG: 
            if (info->shdw_pg_mode == SHADOW_PAGING) {
 #ifdef DEBUG_SHADOW_PAGING
index 631fe5c..c0c713e 100644 (file)
@@ -88,10 +88,12 @@ int v3_config_guest(struct guest_info * info, struct v3_vm_config * config_ptr)
     
     v3_init_hypercall_map(info);
     
-  
-    if (v3_cpu_type == V3_SVM_REV3_CPU) {
+    if ((v3_cpu_type == V3_SVM_REV3_CPU) && 
+       (config_ptr->enable_nested_paging == 1)) {
+       PrintDebug("Guest Page Mode: NESTED_PAGING\n");
        info->shdw_pg_mode = NESTED_PAGING;
     } else {
+       PrintDebug("Guest Page Mode: SHADOW_PAGING\n");
        v3_init_shadow_page_state(info);
        info->shdw_pg_mode = SHADOW_PAGING;
     }
index 783f696..821d7b3 100644 (file)
@@ -44,7 +44,7 @@ addr_t v3_create_direct_passthrough_pts(struct guest_info * info) {
 
 int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) {
     v3_vm_cpu_mode_t mode = v3_get_cpu_mode(info);
-    
+
     switch(mode) {
        case REAL:
        case PROTECTED:
@@ -62,3 +62,29 @@ int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr,
     }
     return -1;
 }
+
+
+
+int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) {
+    // THIS IS VERY BAD
+    v3_vm_cpu_mode_t mode = LONG;
+
+    switch(mode) {
+       case REAL:
+       case PROTECTED:
+           return handle_passthrough_pagefault_32(info, fault_addr, error_code);
+
+       case PROTECTED_PAE:
+           return handle_passthrough_pagefault_32pae(info, fault_addr, error_code);
+
+       case LONG:
+       case LONG_32_COMPAT:
+           return handle_passthrough_pagefault_64(info, fault_addr, error_code);           
+       
+       default:
+           PrintError("Unknown CPU Mode\n");
+           break;
+    }
+    return -1;
+}
+
index 4df18a9..6645f86 100644 (file)
@@ -33,7 +33,7 @@ static inline int handle_passthrough_pagefault_32(struct guest_info * info,
                                                  addr_t fault_addr, 
                                                  pf_error_t error_code) {
     // Check to see if pde and pte exist (create them if not)
-    pde32_t * pde = CR3_TO_PDE32_VA(info->ctrl_regs.cr3);
+    pde32_t * pde = NULL;
     pte32_t * pte = NULL;
     addr_t host_addr = 0;
     
@@ -51,6 +51,14 @@ static inline int handle_passthrough_pagefault_32(struct guest_info * info,
     
     host_addr = v3_get_shadow_addr(region, fault_addr);
     
+    // Lookup the correct PDE address based on the PAGING MODE
+    if (info->shdw_pg_mode == SHADOW_PAGING) {
+       pde = CR3_TO_PDE32_VA(info->ctrl_regs.cr3);
+    } else {
+       pde = CR3_TO_PDE32_VA(info->direct_map_pt);
+    }
+
+
     // Fix up the PDE entry
     if (pde[pde_index].present == 0) {
        pte = (pte32_t *)create_generic_pt_page();
index 033c69d..27cc9b3 100644 (file)
@@ -31,7 +31,7 @@
 static inline int handle_passthrough_pagefault_32pae(struct guest_info * info, 
                                                     addr_t fault_addr, 
                                                     pf_error_t error_code) {
-    pdpe32pae_t * pdpe = CR3_TO_PDPE32PAE_VA(info->ctrl_regs.cr3);
+    pdpe32pae_t * pdpe = NULL;
     pde32pae_t * pde = NULL;
     pte32pae_t * pte = NULL;
     addr_t host_addr = 0;
@@ -51,6 +51,13 @@ static inline int handle_passthrough_pagefault_32pae(struct guest_info * info,
 
     host_addr = v3_get_shadow_addr(region, fault_addr);
 
+    // Lookup the correct PDPE address based on the PAGING MODE
+    if (info->shdw_pg_mode == SHADOW_PAGING) {
+       pdpe = CR3_TO_PDPE32PAE_VA(info->ctrl_regs.cr3);
+    } else {
+       pdpe = CR3_TO_PDPE32PAE_VA(info->direct_map_pt);
+    }
+
     // Fix up the PDPE entry
     if (pdpe[pdpe_index].present == 0) {
        pde = (pde32pae_t *)create_generic_pt_page();
index a840c2b..d79693b 100644 (file)
 static inline int handle_passthrough_pagefault_64(struct guest_info * info, 
                                                     addr_t fault_addr, 
                                                     pf_error_t error_code) {
-  pml4e64_t * pml = CR3_TO_PML4E64_VA(info->ctrl_regs.cr3);
-  pdpe64_t * pdpe = NULL;
-  pde64_t * pde = NULL;
-  pte64_t * pte = NULL;
-  addr_t host_addr = 0;
-
-  int pml_index = PML4E64_INDEX(fault_addr);
-  int pdpe_index = PDPE64_INDEX(fault_addr);
-  int pde_index = PDE64_INDEX(fault_addr);
-  int pte_index = PTE64_INDEX(fault_addr);
-
-  struct v3_shadow_region * region =  v3_get_shadow_region(info, fault_addr);
-  
-  if ((region == NULL) || 
-      (region->host_type == SHDW_REGION_INVALID)) {
-    PrintError("Invalid region in passthrough page fault 64, addr=%p\n", 
-              (void *)fault_addr);
-    return -1;
-  }
-
-  host_addr = v3_get_shadow_addr(region, fault_addr);
-
-  //Fix up the PML entry
-  if (pml[pml_index].present == 0) {
-    pdpe = (pdpe64_t *)create_generic_pt_page();
-   
-    pml[pml_index].present = 1;
-    // Set default PML Flags...
-    pml[pml_index].pdp_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pdpe));    
-  } else {
-    pdpe = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pml[pml_index].pdp_base_addr));
-  }
-
-  // Fix up the PDPE entry
-  if (pdpe[pdpe_index].present == 0) {
-    pde = (pde64_t *)create_generic_pt_page();
-   
-    pdpe[pdpe_index].present = 1;
-    // Set default PDPE Flags...
-    pdpe[pdpe_index].pd_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pde));    
-  } else {
-    pde = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pdpe[pdpe_index].pd_base_addr));
-  }
+    pml4e64_t * pml = NULL;
+    pdpe64_t * pdpe = NULL;
+    pde64_t * pde = NULL;
+    pte64_t * pte = NULL;
+    addr_t host_addr = 0;
 
+    int pml_index = PML4E64_INDEX(fault_addr);
+    int pdpe_index = PDPE64_INDEX(fault_addr);
+    int pde_index = PDE64_INDEX(fault_addr);
+    int pte_index = PTE64_INDEX(fault_addr);
 
-  // Fix up the PDE entry
-  if (pde[pde_index].present == 0) {
-    pte = (pte64_t *)create_generic_pt_page();
 
-    pde[pde_index].present = 1;
-    pde[pde_index].writable = 1;
-    pde[pde_index].user_page = 1;
-
-    pde[pde_index].pt_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pte));
-  } else {
-    pte = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pde[pde_index].pt_base_addr));
-  }
+    struct v3_shadow_region * region =  v3_get_shadow_region(info, fault_addr);
+  
+    if ((region == NULL) || 
+       (region->host_type == SHDW_REGION_INVALID)) {
+       PrintError("Invalid region in passthrough page fault 64, addr=%p\n", 
+                  (void *)fault_addr);
+       return -1;
+    }
 
+    host_addr = v3_get_shadow_addr(region, fault_addr);
 
-  // Fix up the PTE entry
-  if (pte[pte_index].present == 0) {
-    pte[pte_index].user_page = 1;
+    // Lookup the correct PML address based on the PAGING MODE
+    if (info->shdw_pg_mode == SHADOW_PAGING) {
+       pml = CR3_TO_PML4E64_VA(info->ctrl_regs.cr3);
+    } else {
+       pml = CR3_TO_PML4E64_VA(info->direct_map_pt);
+    }
 
-    if (region->host_type == SHDW_REGION_ALLOCATED) {
-      // Full access
-      pte[pte_index].present = 1;
-      pte[pte_index].writable = 1;
+    //Fix up the PML entry
+    if (pml[pml_index].present == 0) {
+       pdpe = (pdpe64_t *)create_generic_pt_page();
+   
+       pml[pml_index].present = 1;
+       // Set default PML Flags...
+       pml[pml_index].pdp_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pdpe));    
+    } else {
+       pdpe = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pml[pml_index].pdp_base_addr));
+    }
 
-      pte[pte_index].page_base_addr = PAGE_BASE_ADDR(host_addr);
+    // Fix up the PDPE entry
+    if (pdpe[pdpe_index].present == 0) {
+       pde = (pde64_t *)create_generic_pt_page();
+       
+       pdpe[pdpe_index].present = 1;
+       // Set default PDPE Flags...
+       pdpe[pdpe_index].pd_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pde));    
+    } else {
+       pde = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pdpe[pdpe_index].pd_base_addr));
+    }
 
-    } else if (region->host_type == SHDW_REGION_WRITE_HOOK) {
-      // Only trap writes
-     pte[pte_index].present = 1; 
-     pte[pte_index].writable = 0;
 
-     pte[pte_index].page_base_addr = PAGE_BASE_ADDR(host_addr);
+    // Fix up the PDE entry
+    if (pde[pde_index].present == 0) {
+       pte = (pte64_t *)create_generic_pt_page();
+       
+       pde[pde_index].present = 1;
+       pde[pde_index].writable = 1;
+       pde[pde_index].user_page = 1;
+       
+       pde[pde_index].pt_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pte));
+    } else {
+       pte = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pde[pde_index].pt_base_addr));
+    }
 
-    } else if (region->host_type == SHDW_REGION_FULL_HOOK) {
-      // trap all accesses
-      return v3_handle_mem_full_hook(info, fault_addr, fault_addr, region, error_code);
 
-    } else {
-      PrintError("Unknown Region Type...\n");
-      return -1;
+    // Fix up the PTE entry
+    if (pte[pte_index].present == 0) {
+       pte[pte_index].user_page = 1;
+       
+       if (region->host_type == SHDW_REGION_ALLOCATED) {
+           // Full access
+           pte[pte_index].present = 1;
+           pte[pte_index].writable = 1;
+
+           pte[pte_index].page_base_addr = PAGE_BASE_ADDR(host_addr);
+           
+       } else if (region->host_type == SHDW_REGION_WRITE_HOOK) {
+           // Only trap writes
+           pte[pte_index].present = 1; 
+           pte[pte_index].writable = 0;
+
+           pte[pte_index].page_base_addr = PAGE_BASE_ADDR(host_addr);
+
+       } else if (region->host_type == SHDW_REGION_FULL_HOOK) {
+           // trap all accesses
+           return v3_handle_mem_full_hook(info, fault_addr, fault_addr, region, error_code);
+
+       } else {
+           PrintError("Unknown Region Type...\n");
+           return -1;
+       }
     }
-  }
    
-  if ( (region->host_type == SHDW_REGION_WRITE_HOOK) && 
-       (error_code.write == 1) ) {
-    return v3_handle_mem_wr_hook(info, fault_addr, fault_addr, region, error_code);
-  }
+    if ( (region->host_type == SHDW_REGION_WRITE_HOOK) && 
+        (error_code.write == 1) ) {
+       return v3_handle_mem_wr_hook(info, fault_addr, fault_addr, region, error_code);
+    }
 
-  return 0;
+    return 0;
 }