From: Jack Lange Date: Wed, 25 Mar 2009 21:36:44 +0000 (-0500) Subject: first cut at nested paging X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=1fe82881720f7f9f64f789871f763aca93b47a7e first cut at nested paging --- diff --git a/palacios/include/palacios/vmm.h b/palacios/include/palacios/vmm.h index 9e9f812..4a4ed79 100644 --- a/palacios/include/palacios/vmm.h +++ b/palacios/include/palacios/vmm.h @@ -273,6 +273,7 @@ struct v3_vm_config { int enable_profiling; + int enable_nested_paging; int use_ramdisk; void * ramdisk; diff --git a/palacios/include/palacios/vmm_direct_paging.h b/palacios/include/palacios/vmm_direct_paging.h index 3bae7e3..22843fd 100644 --- a/palacios/include/palacios/vmm_direct_paging.h +++ b/palacios/include/palacios/vmm_direct_paging.h @@ -29,6 +29,7 @@ addr_t v3_create_direct_passthrough_pts(struct guest_info * guest_info); int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code); +int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code); #endif // ! __V3VEE__ diff --git a/palacios/src/palacios/svm.c b/palacios/src/palacios/svm.c index d578a17..274d20f 100644 --- a/palacios/src/palacios/svm.c +++ b/palacios/src/palacios/svm.c @@ -72,11 +72,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { guest_state->cpl = 0; - //ctrl_area->instrs.instrs.CR0 = 1; - ctrl_area->cr_reads.cr0 = 1; - ctrl_area->cr_writes.cr0 = 1; - //ctrl_area->cr_reads.cr4 = 1; - ctrl_area->cr_writes.cr4 = 1; + /* Set up the efer to enable 64 bit page tables */ @@ -90,15 +86,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { cr4->pae = 1; } */ - guest_state->efer |= EFER_MSR_svm_enable; - vm_info->guest_efer.value = 0x0LL; - - v3_hook_msr(vm_info, EFER_MSR, - &v3_handle_efer_read, - &v3_handle_efer_write, - vm_info); - guest_state->rflags = 0x00000002; // The reserved bit is always 1 @@ -115,11 +103,12 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { ctrl_area->svm_instrs.MONITOR = 1; ctrl_area->svm_instrs.MWAIT_always = 1; ctrl_area->svm_instrs.MWAIT_if_armed = 1; + ctrl_area->instrs.INVLPGA = 1; ctrl_area->instrs.HLT = 1; // guest_state->cr0 = 0x00000001; // PE - ctrl_area->guest_ASID = 1; + /* @@ -235,28 +224,33 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { if (vm_info->shdw_pg_mode == SHADOW_PAGING) { PrintDebug("Creating initial shadow page table\n"); + ctrl_area->guest_ASID = 1; - - /* Testing 64 bit page tables for long paged real mode guests */ - // vm_info->direct_map_pt = (addr_t)V3_PAddr(create_passthrough_pts_64(vm_info)); vm_info->direct_map_pt = (addr_t)V3_PAddr((void *)v3_create_direct_passthrough_pts(vm_info)); - /* End Test */ vm_info->shdw_pg_state.guest_cr0 = 0x0000000000000010LL; PrintDebug("Created\n"); - guest_state->cr3 = vm_info->direct_map_pt; - - //PrintDebugPageTables((pde32_t*)(vm_info->shdw_pg_state.shadow_cr3.e_reg.low)); - + ctrl_area->cr_reads.cr0 = 1; + ctrl_area->cr_writes.cr0 = 1; + //ctrl_area->cr_reads.cr4 = 1; + ctrl_area->cr_writes.cr4 = 1; ctrl_area->cr_reads.cr3 = 1; ctrl_area->cr_writes.cr3 = 1; + vm_info->guest_efer.value = 0x0LL; + + v3_hook_msr(vm_info, EFER_MSR, + &v3_handle_efer_read, + &v3_handle_efer_write, + vm_info); + + ctrl_area->instrs.INVLPG = 1; - ctrl_area->instrs.INVLPGA = 1; + ctrl_area->exceptions.pf = 1; @@ -264,7 +258,6 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { /* We need to fix this */ ctrl_area->TLB_CONTROL = 1; - guest_state->g_pat = 0x7040600070406ULL; guest_state->cr0 |= 0x80000000; @@ -272,6 +265,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { } else if (vm_info->shdw_pg_mode == NESTED_PAGING) { // Flush the TLB on entries/exits ctrl_area->TLB_CONTROL = 1; + ctrl_area->guest_ASID = 1; // Enable Nested Paging ctrl_area->NP_ENABLE = 1; @@ -279,7 +273,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE)); // Set the Nested Page Table pointer - vm_info->direct_map_pt = ((addr_t)v3_create_direct_passthrough_pts(vm_info) & ~0xfff); + vm_info->direct_map_pt = (addr_t)V3_PAddr((void *)v3_create_direct_passthrough_pts(vm_info)); ctrl_area->N_CR3 = vm_info->direct_map_pt; // ctrl_area->N_CR3 = Get_CR3(); @@ -557,6 +551,7 @@ static int has_svm_nested_paging() { void v3_init_SVM(struct v3_ctrl_ops * vmm_ops) { reg_ex_t msr; void * host_state; + extern v3_cpu_arch_t v3_cpu_type; // Enable SVM on the CPU v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low)); @@ -578,7 +573,11 @@ void v3_init_SVM(struct v3_ctrl_ops * vmm_ops) { PrintDebug("Host State being saved at %p\n", (void *)(addr_t)host_state); v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low); - + if (has_svm_nested_paging() == 1) { + v3_cpu_type = V3_SVM_REV3_CPU; + } else { + v3_cpu_type = V3_SVM_CPU; + } // Setup the SVM specific vmm operations vmm_ops->init_guest = &init_svm_guest; diff --git a/palacios/src/palacios/svm_handler.c b/palacios/src/palacios/svm_handler.c index aaed6c6..931bb99 100644 --- a/palacios/src/palacios/svm_handler.c +++ b/palacios/src/palacios/svm_handler.c @@ -32,7 +32,7 @@ #include #include #include - +#include @@ -247,12 +247,20 @@ int v3_handle_svm_exit(struct guest_info * info) { } break; } - case VMEXIT_NPF: + case VMEXIT_NPF: { + addr_t fault_addr = guest_ctrl->exit_info2; + pf_error_t * error_code = (pf_error_t *)&(guest_ctrl->exit_info1); - PrintError("Currently unhandled Nested Page Fault\n"); - return -1; - + if (info->shdw_pg_mode == NESTED_PAGING) { + if (v3_handle_nested_pagefault(info, fault_addr, *error_code) == -1) { + return -1; + } + } else { + PrintError("Currently unhandled Nested Page Fault\n"); + return -1; + } break; + } case VMEXIT_INVLPG: if (info->shdw_pg_mode == SHADOW_PAGING) { #ifdef DEBUG_SHADOW_PAGING diff --git a/palacios/src/palacios/vmm_config.c b/palacios/src/palacios/vmm_config.c index 631fe5c..c0c713e 100644 --- a/palacios/src/palacios/vmm_config.c +++ b/palacios/src/palacios/vmm_config.c @@ -88,10 +88,12 @@ int v3_config_guest(struct guest_info * info, struct v3_vm_config * config_ptr) v3_init_hypercall_map(info); - - if (v3_cpu_type == V3_SVM_REV3_CPU) { + if ((v3_cpu_type == V3_SVM_REV3_CPU) && + (config_ptr->enable_nested_paging == 1)) { + PrintDebug("Guest Page Mode: NESTED_PAGING\n"); info->shdw_pg_mode = NESTED_PAGING; } else { + PrintDebug("Guest Page Mode: SHADOW_PAGING\n"); v3_init_shadow_page_state(info); info->shdw_pg_mode = SHADOW_PAGING; } diff --git a/palacios/src/palacios/vmm_direct_paging.c b/palacios/src/palacios/vmm_direct_paging.c index 783f696..821d7b3 100644 --- a/palacios/src/palacios/vmm_direct_paging.c +++ b/palacios/src/palacios/vmm_direct_paging.c @@ -44,7 +44,7 @@ addr_t v3_create_direct_passthrough_pts(struct guest_info * info) { int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) { v3_vm_cpu_mode_t mode = v3_get_cpu_mode(info); - + switch(mode) { case REAL: case PROTECTED: @@ -62,3 +62,29 @@ int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, } return -1; } + + + +int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) { + // THIS IS VERY BAD + v3_vm_cpu_mode_t mode = LONG; + + switch(mode) { + case REAL: + case PROTECTED: + return handle_passthrough_pagefault_32(info, fault_addr, error_code); + + case PROTECTED_PAE: + return handle_passthrough_pagefault_32pae(info, fault_addr, error_code); + + case LONG: + case LONG_32_COMPAT: + return handle_passthrough_pagefault_64(info, fault_addr, error_code); + + default: + PrintError("Unknown CPU Mode\n"); + break; + } + return -1; +} + diff --git a/palacios/src/palacios/vmm_direct_paging_32.h b/palacios/src/palacios/vmm_direct_paging_32.h index 4df18a9..6645f86 100644 --- a/palacios/src/palacios/vmm_direct_paging_32.h +++ b/palacios/src/palacios/vmm_direct_paging_32.h @@ -33,7 +33,7 @@ static inline int handle_passthrough_pagefault_32(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) { // Check to see if pde and pte exist (create them if not) - pde32_t * pde = CR3_TO_PDE32_VA(info->ctrl_regs.cr3); + pde32_t * pde = NULL; pte32_t * pte = NULL; addr_t host_addr = 0; @@ -51,6 +51,14 @@ static inline int handle_passthrough_pagefault_32(struct guest_info * info, host_addr = v3_get_shadow_addr(region, fault_addr); + // Lookup the correct PDE address based on the PAGING MODE + if (info->shdw_pg_mode == SHADOW_PAGING) { + pde = CR3_TO_PDE32_VA(info->ctrl_regs.cr3); + } else { + pde = CR3_TO_PDE32_VA(info->direct_map_pt); + } + + // Fix up the PDE entry if (pde[pde_index].present == 0) { pte = (pte32_t *)create_generic_pt_page(); diff --git a/palacios/src/palacios/vmm_direct_paging_32pae.h b/palacios/src/palacios/vmm_direct_paging_32pae.h index 033c69d..27cc9b3 100644 --- a/palacios/src/palacios/vmm_direct_paging_32pae.h +++ b/palacios/src/palacios/vmm_direct_paging_32pae.h @@ -31,7 +31,7 @@ static inline int handle_passthrough_pagefault_32pae(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) { - pdpe32pae_t * pdpe = CR3_TO_PDPE32PAE_VA(info->ctrl_regs.cr3); + pdpe32pae_t * pdpe = NULL; pde32pae_t * pde = NULL; pte32pae_t * pte = NULL; addr_t host_addr = 0; @@ -51,6 +51,13 @@ static inline int handle_passthrough_pagefault_32pae(struct guest_info * info, host_addr = v3_get_shadow_addr(region, fault_addr); + // Lookup the correct PDPE address based on the PAGING MODE + if (info->shdw_pg_mode == SHADOW_PAGING) { + pdpe = CR3_TO_PDPE32PAE_VA(info->ctrl_regs.cr3); + } else { + pdpe = CR3_TO_PDPE32PAE_VA(info->direct_map_pt); + } + // Fix up the PDPE entry if (pdpe[pdpe_index].present == 0) { pde = (pde32pae_t *)create_generic_pt_page(); diff --git a/palacios/src/palacios/vmm_direct_paging_64.h b/palacios/src/palacios/vmm_direct_paging_64.h index a840c2b..d79693b 100644 --- a/palacios/src/palacios/vmm_direct_paging_64.h +++ b/palacios/src/palacios/vmm_direct_paging_64.h @@ -31,99 +31,107 @@ static inline int handle_passthrough_pagefault_64(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) { - pml4e64_t * pml = CR3_TO_PML4E64_VA(info->ctrl_regs.cr3); - pdpe64_t * pdpe = NULL; - pde64_t * pde = NULL; - pte64_t * pte = NULL; - addr_t host_addr = 0; - - int pml_index = PML4E64_INDEX(fault_addr); - int pdpe_index = PDPE64_INDEX(fault_addr); - int pde_index = PDE64_INDEX(fault_addr); - int pte_index = PTE64_INDEX(fault_addr); - - struct v3_shadow_region * region = v3_get_shadow_region(info, fault_addr); - - if ((region == NULL) || - (region->host_type == SHDW_REGION_INVALID)) { - PrintError("Invalid region in passthrough page fault 64, addr=%p\n", - (void *)fault_addr); - return -1; - } - - host_addr = v3_get_shadow_addr(region, fault_addr); - - //Fix up the PML entry - if (pml[pml_index].present == 0) { - pdpe = (pdpe64_t *)create_generic_pt_page(); - - pml[pml_index].present = 1; - // Set default PML Flags... - pml[pml_index].pdp_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pdpe)); - } else { - pdpe = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pml[pml_index].pdp_base_addr)); - } - - // Fix up the PDPE entry - if (pdpe[pdpe_index].present == 0) { - pde = (pde64_t *)create_generic_pt_page(); - - pdpe[pdpe_index].present = 1; - // Set default PDPE Flags... - pdpe[pdpe_index].pd_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pde)); - } else { - pde = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pdpe[pdpe_index].pd_base_addr)); - } + pml4e64_t * pml = NULL; + pdpe64_t * pdpe = NULL; + pde64_t * pde = NULL; + pte64_t * pte = NULL; + addr_t host_addr = 0; + int pml_index = PML4E64_INDEX(fault_addr); + int pdpe_index = PDPE64_INDEX(fault_addr); + int pde_index = PDE64_INDEX(fault_addr); + int pte_index = PTE64_INDEX(fault_addr); - // Fix up the PDE entry - if (pde[pde_index].present == 0) { - pte = (pte64_t *)create_generic_pt_page(); - pde[pde_index].present = 1; - pde[pde_index].writable = 1; - pde[pde_index].user_page = 1; - - pde[pde_index].pt_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pte)); - } else { - pte = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pde[pde_index].pt_base_addr)); - } + struct v3_shadow_region * region = v3_get_shadow_region(info, fault_addr); + + if ((region == NULL) || + (region->host_type == SHDW_REGION_INVALID)) { + PrintError("Invalid region in passthrough page fault 64, addr=%p\n", + (void *)fault_addr); + return -1; + } + host_addr = v3_get_shadow_addr(region, fault_addr); - // Fix up the PTE entry - if (pte[pte_index].present == 0) { - pte[pte_index].user_page = 1; + // Lookup the correct PML address based on the PAGING MODE + if (info->shdw_pg_mode == SHADOW_PAGING) { + pml = CR3_TO_PML4E64_VA(info->ctrl_regs.cr3); + } else { + pml = CR3_TO_PML4E64_VA(info->direct_map_pt); + } - if (region->host_type == SHDW_REGION_ALLOCATED) { - // Full access - pte[pte_index].present = 1; - pte[pte_index].writable = 1; + //Fix up the PML entry + if (pml[pml_index].present == 0) { + pdpe = (pdpe64_t *)create_generic_pt_page(); + + pml[pml_index].present = 1; + // Set default PML Flags... + pml[pml_index].pdp_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pdpe)); + } else { + pdpe = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pml[pml_index].pdp_base_addr)); + } - pte[pte_index].page_base_addr = PAGE_BASE_ADDR(host_addr); + // Fix up the PDPE entry + if (pdpe[pdpe_index].present == 0) { + pde = (pde64_t *)create_generic_pt_page(); + + pdpe[pdpe_index].present = 1; + // Set default PDPE Flags... + pdpe[pdpe_index].pd_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pde)); + } else { + pde = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pdpe[pdpe_index].pd_base_addr)); + } - } else if (region->host_type == SHDW_REGION_WRITE_HOOK) { - // Only trap writes - pte[pte_index].present = 1; - pte[pte_index].writable = 0; - pte[pte_index].page_base_addr = PAGE_BASE_ADDR(host_addr); + // Fix up the PDE entry + if (pde[pde_index].present == 0) { + pte = (pte64_t *)create_generic_pt_page(); + + pde[pde_index].present = 1; + pde[pde_index].writable = 1; + pde[pde_index].user_page = 1; + + pde[pde_index].pt_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pte)); + } else { + pte = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pde[pde_index].pt_base_addr)); + } - } else if (region->host_type == SHDW_REGION_FULL_HOOK) { - // trap all accesses - return v3_handle_mem_full_hook(info, fault_addr, fault_addr, region, error_code); - } else { - PrintError("Unknown Region Type...\n"); - return -1; + // Fix up the PTE entry + if (pte[pte_index].present == 0) { + pte[pte_index].user_page = 1; + + if (region->host_type == SHDW_REGION_ALLOCATED) { + // Full access + pte[pte_index].present = 1; + pte[pte_index].writable = 1; + + pte[pte_index].page_base_addr = PAGE_BASE_ADDR(host_addr); + + } else if (region->host_type == SHDW_REGION_WRITE_HOOK) { + // Only trap writes + pte[pte_index].present = 1; + pte[pte_index].writable = 0; + + pte[pte_index].page_base_addr = PAGE_BASE_ADDR(host_addr); + + } else if (region->host_type == SHDW_REGION_FULL_HOOK) { + // trap all accesses + return v3_handle_mem_full_hook(info, fault_addr, fault_addr, region, error_code); + + } else { + PrintError("Unknown Region Type...\n"); + return -1; + } } - } - if ( (region->host_type == SHDW_REGION_WRITE_HOOK) && - (error_code.write == 1) ) { - return v3_handle_mem_wr_hook(info, fault_addr, fault_addr, region, error_code); - } + if ( (region->host_type == SHDW_REGION_WRITE_HOOK) && + (error_code.write == 1) ) { + return v3_handle_mem_wr_hook(info, fault_addr, fault_addr, region, error_code); + } - return 0; + return 0; }