From: Jack Lange Date: Wed, 29 Oct 2008 21:48:02 +0000 (-0500) Subject: setting up framework for 64 bit shadow paging X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=9b31f917eae9b397cb21ff78d81084301b289e43 setting up framework for 64 bit shadow paging streamlined geekos build --- diff --git a/build/Makefile b/build/Makefile index e4ee438..9083d93 100644 --- a/build/Makefile +++ b/build/Makefile @@ -217,7 +217,10 @@ kitten-full: palacios-full64 (cd $(KITTEN_TOP_DIR) && make) -geekos: +geekos: palacios32 + cp $(PALACIOS_BUILD_DIR)/libv3vee.a $(GEEKOS_BUILD_DIR)/palacios/ + cp $(PALACIOS_BUILD_DIR)/../lib/xed/libxed.a $(GEEKOS_BUILD_DIR)/palacios/ + cp $(PALACIOS_BUILD_DIR)/vm_kernel $(GEEKOS_BUILD_DIR)/palacios/ (cd $(GEEKOS_BUILD_DIR) && make) diff --git a/palacios/build/Makefile b/palacios/build/Makefile index 2ea49e2..8f2da64 100644 --- a/palacios/build/Makefile +++ b/palacios/build/Makefile @@ -62,9 +62,11 @@ endif ifeq ($(DEBUG_ALL),1) - DEBUG_SECTIONS:= $(DEBUG_SECTIONS) -DDEBUG_SHADOW_PAGING -DDEBUG_CTRL_REGS -DDEBUG_INTERRUPTS -DDEBUG_IO -DDEBUG_KEYBOARD -DDEBUG_PIC -DDEBUG_PIT -DDEBUG_NVRAM -DDEBUG_EMULATOR -DDEBUG_GENERIC -DDEBUG_RAMDISK -DDEBUG_XED -DDEBUG_HALT -DDEBUG_DEV_MGR + DEBUG_SECTIONS:= $(DEBUG_SECTIONS) -DDEBUG_SHADOW_PAGING -DDEBUG_CTRL_REGS -DDEBUG_INTERRUPTS -DDEBUG_KEYBOARD -DDEBUG_PIC -DDEBUG_PIT -DDEBUG_NVRAM -DDEBUG_EMULATOR -DDEBUG_XED -DDEBUG_HALT -DDEBUG_DEV_MGR +# -DDEBUG_IO -DDEBUG_GENERIC -DDEBUG_RAMDISK endif + ifeq ($(DEBUG_SHADOW_PAGING),1) DEBUG_SECTIONS := $(DEBUG_SECTIONS) -DDEBUG_SHADOW_PAGING else diff --git a/palacios/include/palacios/vmm_shadow_paging.h b/palacios/include/palacios/vmm_shadow_paging.h index b2b3d1e..1245934 100644 --- a/palacios/include/palacios/vmm_shadow_paging.h +++ b/palacios/include/palacios/vmm_shadow_paging.h @@ -64,7 +64,7 @@ int v3_handle_shadow_invlpg(struct guest_info * info); int v3_activate_shadow_pt(struct guest_info * info); - +int v3_activate_passthrough_pt(struct guest_info * info); /* TODO: Change to static functions * External visibility not needed diff --git a/palacios/src/palacios/svm.c b/palacios/src/palacios/svm.c index 6383a26..8efa804 100644 --- a/palacios/src/palacios/svm.c +++ b/palacios/src/palacios/svm.c @@ -73,7 +73,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { //ctrl_area->instrs.instrs.CR0 = 1; ctrl_area->cr_reads.cr0 = 1; ctrl_area->cr_writes.cr0 = 1; - ctrl_area->cr_reads.cr4 = 1; + //ctrl_area->cr_reads.cr4 = 1; ctrl_area->cr_writes.cr4 = 1; diff --git a/palacios/src/palacios/vmm_ctrl_regs.c b/palacios/src/palacios/vmm_ctrl_regs.c index 0fb2e2c..4cb40d8 100644 --- a/palacios/src/palacios/vmm_ctrl_regs.c +++ b/palacios/src/palacios/vmm_ctrl_regs.c @@ -69,13 +69,7 @@ int v3_handle_cr0_write(struct guest_info * info) { ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr); } - /* The IFetch will already have faulted in the necessary bytes for the full instruction - if (ret != 15) { - // I think we should inject a GPF into the guest - PrintError("Could not read instruction (ret=%d)\n", ret); - return -1; - } - */ + if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) { PrintError("Could not decode instruction\n"); @@ -171,11 +165,21 @@ static int handle_mov_to_cr0_32(struct guest_info * info, struct x86_instr * dec if (v3_get_mem_mode(info) == VIRTUAL_MEM) { - struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.guest_cr3); - PrintDebug("Setting up Guest Page Table\n"); - info->ctrl_regs.cr3 = *(addr_t*)guest_cr3; + /* struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.guest_cr3); + info->ctrl_regs.cr3 = *(addr_t*)guest_cr3; + */ + PrintDebug("Activating Shadow Page Tables\n"); + + if (v3_activate_shadow_pt(info) == -1) { + PrintError("Failed to activate shadow page tables\n"); + return -1; + } } else { - info->ctrl_regs.cr3 = *(addr_t*)&(info->direct_map_pt); + + if (v3_activate_passthrough_pt(info) == -1) { + PrintError("Failed to activate passthrough page tables\n"); + return -1; + } shadow_cr0->pg = 1; } @@ -251,13 +255,6 @@ int v3_handle_cr0_read(struct guest_info * info) { ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr); } - /* The IFetch will already have faulted in the necessary bytes for the full instruction - if (ret != 15) { - // I think we should inject a GPF into the guest - PrintError("Could not read instruction (ret=%d)\n", ret); - return -1; - } - */ if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) { PrintError("Could not decode instruction\n"); @@ -345,18 +342,22 @@ int v3_handle_cr3_write(struct guest_info * info) { if (handle_mov_to_cr3_32(info, &dec_instr) == -1) { return -1; } + break; case PROTECTED_PAE: if (handle_mov_to_cr3_32pae(info, &dec_instr) == -1) { return -1; } + break; case LONG: if (handle_mov_to_cr3_64(info, &dec_instr) == -1) { return -1; } + break; case LONG_32_COMPAT: if (handle_mov_to_cr3_64compat(info, &dec_instr) == -1) { return -1; } + break; default: PrintError("Unhandled CPU mode: %d\n", info->cpu_mode); return -1; @@ -384,9 +385,10 @@ static int handle_mov_to_cr3_32(struct guest_info * info, struct x86_instr * dec if (info->shdw_pg_mode == SHADOW_PAGING) { struct cr3_32 * new_cr3 = (struct cr3_32 *)(dec_instr->src_operand.operand); struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.guest_cr3); - // struct cr3_32 * shadow_cr3 = (struct cr3_32 *)&(info->ctrl_regs.cr3); +#ifdef DEBUG_CTRL_REGS + struct cr3_32 * shadow_cr3 = (struct cr3_32 *)&(info->ctrl_regs.cr3); +#endif - PrintDebug("Old Shadow CR3=%x; Old Guest CR3=%x\n", *(uint_t*)shadow_cr3, *(uint_t*)guest_cr3); @@ -436,13 +438,7 @@ int v3_handle_cr3_read(struct guest_info * info) { ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr); } - /* The IFetch will already have faulted in the necessary bytes for the full instruction - if (ret != 15) { - // I think we should inject a GPF into the guest - PrintError("Could not read instruction (ret=%d)\n", ret); - return -1; - } - */ + if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) { PrintError("Could not decode instruction\n"); @@ -470,9 +466,12 @@ int v3_handle_cr3_read(struct guest_info * info) { return 0; } + +// We don't need to virtualize CR4, all we need is to detect the activation of PAE int v3_handle_cr4_read(struct guest_info * info) { - PrintError("CR4 Read not handled\n"); - return -1; + // PrintError("CR4 Read not handled\n"); + // Do nothing... + return 0; } int v3_handle_cr4_write(struct guest_info * info) { diff --git a/palacios/src/palacios/vmm_shadow_paging.c b/palacios/src/palacios/vmm_shadow_paging.c index 529ca17..63d0e92 100644 --- a/palacios/src/palacios/vmm_shadow_paging.c +++ b/palacios/src/palacios/vmm_shadow_paging.c @@ -74,13 +74,9 @@ static int activate_shadow_pt_32pae(struct guest_info * info); static int activate_shadow_pt_64(struct guest_info * info); -static int handle_shadow_pte32_fault(struct guest_info* info, - addr_t fault_addr, - pf_error_t error_code, - pte32_t * shadow_pte, - pte32_t * guest_pte); - -static int handle_shadow_pagefault32(struct guest_info * info, addr_t fault_addr, pf_error_t error_code); +static int handle_shadow_pagefault_32(struct guest_info * info, addr_t fault_addr, pf_error_t error_code); +static int handle_shadow_pagefault_32pae(struct guest_info * info, addr_t fault_addr, pf_error_t error_code); +static int handle_shadow_pagefault_64(struct guest_info * info, addr_t fault_addr, pf_error_t error_code); int v3_init_shadow_page_state(struct guest_info * info) { struct shadow_page_state * state = &(info->shdw_pg_state); @@ -235,6 +231,8 @@ int v3_replace_shdw_page32(struct guest_info * info, addr_t location, pte32_t * +// We assume that shdw_pg_state.guest_cr3 is pointing to the page tables we want to activate +// We also assume that the CPU mode has not changed during this page table transition static int activate_shadow_pt_32(struct guest_info * info) { struct cr3_32 * shadow_cr3 = (struct cr3_32 *)&(info->ctrl_regs.cr3); struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.guest_cr3); @@ -300,6 +298,18 @@ int v3_activate_shadow_pt(struct guest_info * info) { } +int v3_activate_passthrough_pt(struct guest_info * info) { + // For now... But we need to change this.... + // As soon as shadow paging becomes active the passthrough tables are hosed + // So this will cause chaos if it is called at that time + + info->ctrl_regs.cr3 = *(addr_t*)&(info->direct_map_pt); + //PrintError("Activate Passthrough Page tables not implemented\n"); + return 0; +} + + + int v3_handle_shadow_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) { if (info->mem_mode == PHYSICAL_MEM) { @@ -314,10 +324,13 @@ int v3_handle_shadow_pagefault(struct guest_info * info, addr_t fault_addr, pf_e switch (info->cpu_mode) { case PROTECTED: - return handle_shadow_pagefault32(info, fault_addr, error_code); + return handle_shadow_pagefault_32(info, fault_addr, error_code); break; case PROTECTED_PAE: + return handle_shadow_pagefault_32pae(info, fault_addr, error_code); case LONG: + return handle_shadow_pagefault_64(info, fault_addr, error_code); + break; default: PrintError("Unhandled CPU Mode\n"); return -1; @@ -378,95 +391,58 @@ static int is_guest_pf(pt_access_status_t guest_access, pt_access_status_t shado -/* The guest status checks have already been done, - * only special case shadow checks remain +/* + * * + * * + * * 64 bit Page table fault handlers + * * + * * */ -static int handle_large_pagefault32(struct guest_info * info, - addr_t fault_addr, pf_error_t error_code, - pte32_t * shadow_pt, pde32_4MB_t * large_guest_pde) -{ - pt_access_status_t shadow_pte_access = can_access_pte32(shadow_pt, fault_addr, error_code); - pte32_t * shadow_pte = (pte32_t *)&(shadow_pt[PTE32_INDEX(fault_addr)]); - - if (shadow_pte_access == PT_ACCESS_OK) { - // Inconsistent state... - // Guest Re-Entry will flush tables and everything should now workd - PrintDebug("Inconsistent state... Guest re-entry should flush tlb\n"); - return 0; - } - - if (shadow_pte_access == PT_ENTRY_NOT_PRESENT) { - // Get the guest physical address of the fault - addr_t guest_fault_pa = PDE32_4MB_T_ADDR(*large_guest_pde) + PD32_4MB_PAGE_OFFSET(fault_addr); - host_region_type_t host_page_type = get_shadow_addr_type(info, guest_fault_pa); - +static int handle_shadow_pagefault_64(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) { + PrintError("64 bit shadow paging not implemented\n"); + return -1; +} - if (host_page_type == HOST_REGION_INVALID) { - // Inject a machine check in the guest - PrintDebug("Invalid Guest Address in page table (0x%p)\n", (void *)guest_fault_pa); - v3_raise_exception(info, MC_EXCEPTION); - return 0; - } - if (host_page_type == HOST_REGION_PHYSICAL_MEMORY) { - struct shadow_page_state * state = &(info->shdw_pg_state); - addr_t shadow_pa = get_shadow_addr(info, guest_fault_pa); +/* + * * + * * + * * 32 bit PAE Page table fault handlers + * * + * * + */ - shadow_pte->page_base_addr = PT32_BASE_ADDR(shadow_pa); +static int handle_shadow_pagefault_32pae(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) { + PrintError("32 bit PAE shadow paging not implemented\n"); + return -1; +} - shadow_pte->present = 1; - /* We are assuming that the PDE entry has precedence - * so the Shadow PDE will mirror the guest PDE settings, - * and we don't have to worry about them here - * Allow everything - */ - shadow_pte->user_page = 1; - if (find_pte_map(state->cached_ptes, PT32_PAGE_ADDR(guest_fault_pa)) != NULL) { - // Check if the entry is a page table... - PrintDebug("Marking page as Guest Page Table (large page)\n"); - shadow_pte->vmm_info = PT32_GUEST_PT; - shadow_pte->writable = 0; - } else { - shadow_pte->writable = 1; - } - //set according to VMM policy - shadow_pte->write_through = 0; - shadow_pte->cache_disable = 0; - shadow_pte->global_page = 0; - // - - } else { - // Handle hooked pages as well as other special pages - if (handle_special_page_fault(info, fault_addr, guest_fault_pa, error_code) == -1) { - PrintError("Special Page Fault handler returned error for address: %p\n", (void *)fault_addr); - return -1; - } - } - } else if ((shadow_pte_access == PT_WRITE_ERROR) && - (shadow_pte->vmm_info == PT32_GUEST_PT)) { - struct shadow_page_state * state = &(info->shdw_pg_state); - PrintDebug("Write operation on Guest PAge Table Page (large page)\n"); - state->cached_cr3 = 0; - shadow_pte->writable = 1; - } else { - PrintError("Error in large page fault handler...\n"); - PrintError("This case should have been handled at the top level handler\n"); - return -1; - } +/* + * * + * * + * * 32 bit Page table fault handlers + * * + * * + */ +static int handle_large_pagefault_32(struct guest_info * info, + addr_t fault_addr, pf_error_t error_code, + pte32_t * shadow_pt, pde32_4MB_t * large_guest_pde); - PrintDebug("Returning from large page fault handler\n"); - return 0; -} +static int handle_shadow_pte32_fault(struct guest_info * info, + addr_t fault_addr, + pf_error_t error_code, + pte32_t * shadow_pt, + pte32_t * guest_pt); -static int handle_shadow_pagefault32(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) { +static int handle_shadow_pagefault_32(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) { pde32_t * guest_pd = NULL; pde32_t * shadow_pd = CR3_TO_PDE32_VA(info->ctrl_regs.cr3); addr_t guest_cr3 = CR3_TO_PDE32_PA(info->shdw_pg_state.guest_cr3); @@ -549,7 +525,7 @@ static int handle_shadow_pagefault32(struct guest_info * info, addr_t fault_addr return -1; } } else if (guest_pde->large_page == 1) { - if (handle_large_pagefault32(info, fault_addr, error_code, shadow_pt, (pde32_4MB_t *)guest_pde) == -1) { + if (handle_large_pagefault_32(info, fault_addr, error_code, shadow_pt, (pde32_4MB_t *)guest_pde) == -1) { PrintError("Error handling large pagefault\n"); return -1; } @@ -602,6 +578,96 @@ static int handle_shadow_pagefault32(struct guest_info * info, addr_t fault_addr +/* The guest status checks have already been done, + * only special case shadow checks remain + */ +static int handle_large_pagefault_32(struct guest_info * info, + addr_t fault_addr, pf_error_t error_code, + pte32_t * shadow_pt, pde32_4MB_t * large_guest_pde) +{ + pt_access_status_t shadow_pte_access = can_access_pte32(shadow_pt, fault_addr, error_code); + pte32_t * shadow_pte = (pte32_t *)&(shadow_pt[PTE32_INDEX(fault_addr)]); + + if (shadow_pte_access == PT_ACCESS_OK) { + // Inconsistent state... + // Guest Re-Entry will flush tables and everything should now workd + PrintDebug("Inconsistent state... Guest re-entry should flush tlb\n"); + return 0; + } + + + if (shadow_pte_access == PT_ENTRY_NOT_PRESENT) { + // Get the guest physical address of the fault + addr_t guest_fault_pa = PDE32_4MB_T_ADDR(*large_guest_pde) + PD32_4MB_PAGE_OFFSET(fault_addr); + host_region_type_t host_page_type = get_shadow_addr_type(info, guest_fault_pa); + + + if (host_page_type == HOST_REGION_INVALID) { + // Inject a machine check in the guest + PrintDebug("Invalid Guest Address in page table (0x%p)\n", (void *)guest_fault_pa); + v3_raise_exception(info, MC_EXCEPTION); + return 0; + } + + if (host_page_type == HOST_REGION_PHYSICAL_MEMORY) { + struct shadow_page_state * state = &(info->shdw_pg_state); + addr_t shadow_pa = get_shadow_addr(info, guest_fault_pa); + + shadow_pte->page_base_addr = PT32_BASE_ADDR(shadow_pa); + + shadow_pte->present = 1; + + /* We are assuming that the PDE entry has precedence + * so the Shadow PDE will mirror the guest PDE settings, + * and we don't have to worry about them here + * Allow everything + */ + shadow_pte->user_page = 1; + + if (find_pte_map(state->cached_ptes, PT32_PAGE_ADDR(guest_fault_pa)) != NULL) { + // Check if the entry is a page table... + PrintDebug("Marking page as Guest Page Table (large page)\n"); + shadow_pte->vmm_info = PT32_GUEST_PT; + shadow_pte->writable = 0; + } else { + shadow_pte->writable = 1; + } + + + //set according to VMM policy + shadow_pte->write_through = 0; + shadow_pte->cache_disable = 0; + shadow_pte->global_page = 0; + // + + } else { + // Handle hooked pages as well as other special pages + if (handle_special_page_fault(info, fault_addr, guest_fault_pa, error_code) == -1) { + PrintError("Special Page Fault handler returned error for address: %p\n", (void *)fault_addr); + return -1; + } + } + } else if ((shadow_pte_access == PT_WRITE_ERROR) && + (shadow_pte->vmm_info == PT32_GUEST_PT)) { + + struct shadow_page_state * state = &(info->shdw_pg_state); + PrintDebug("Write operation on Guest PAge Table Page (large page)\n"); + state->cached_cr3 = 0; + shadow_pte->writable = 1; + + } else { + PrintError("Error in large page fault handler...\n"); + PrintError("This case should have been handled at the top level handler\n"); + return -1; + } + + PrintDebug("Returning from large page fault handler\n"); + return 0; +} + + + + /* * We assume the the guest pte pointer has already been translated to a host virtual address */ @@ -652,6 +718,7 @@ static int handle_shadow_pte32_fault(struct guest_info * info, addr_t guest_pa = PTE32_T_ADDR((*guest_pte)) + PT32_PAGE_OFFSET(fault_addr); // Page Table Entry Not Present + PrintDebug("guest_pa =%p\n", (void *)guest_pa); host_region_type_t host_page_type = get_shadow_addr_type(info, guest_pa);