From: Patrick Bridges Date: Wed, 11 Aug 2010 17:21:48 +0000 (-0600) Subject: Changes to support large shadow pages *correctly*. X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=8684ef59c3ede8fe0c33b2f04dbe30a287e7b353;p=palacios.git Changes to support large shadow pages *correctly*. --- diff --git a/palacios/include/palacios/vmm_mem.h b/palacios/include/palacios/vmm_mem.h index a8e776a..651e82e 100644 --- a/palacios/include/palacios/vmm_mem.h +++ b/palacios/include/palacios/vmm_mem.h @@ -109,7 +109,7 @@ struct v3_mem_region * v3_get_next_mem_region(struct v3_vm_info * vm, uint16_t c void v3_print_mem_map(struct v3_vm_info * vm); - +uint32_t v3_get_max_page_size(struct guest_info * core, addr_t fault_addr, uint32_t req_size); diff --git a/palacios/src/palacios/mmu/vmm_shdw_pg_tlb_64.h b/palacios/src/palacios/mmu/vmm_shdw_pg_tlb_64.h index db59d7d..aff8034 100644 --- a/palacios/src/palacios/mmu/vmm_shdw_pg_tlb_64.h +++ b/palacios/src/palacios/mmu/vmm_shdw_pg_tlb_64.h @@ -332,23 +332,32 @@ static int handle_pde_shadow_pagefault_64(struct guest_info * info, addr_t fault return 0; } - // Handle as a shadow large page if possible - if (guest_pde->large_page - && (info->vm_info->mem_align >= PAGE_SIZE_2MB)) { - if (handle_2MB_shadow_pagefault_pde_64(info, fault_addr, error_code, shadow_pde_access, - (pde64_2MB_t *)shadow_pde, (pde64_2MB_t *)guest_pde) == -1) { - PrintError("Error handling large pagefault with large page\n"); - return -1; - } else { - return 0; - } - } - pte64_t * shadow_pt = NULL; pte64_t * guest_pt = NULL; // get the next shadow page level, allocate if not present if (shadow_pde_access == PT_ACCESS_NOT_PRESENT) { + // Check if we can use large pages and the guest memory is properly aligned + // to potentially use a large page + if (info->use_large_pages && guest_pde->large_page + && (info->vm_info->mem_align >= PAGE_SIZE_2MB)) { + // Check underlying physical memory map to see if a large page is viable + addr_t guest_pa = BASE_TO_PAGE_ADDR_2MB(((pde64_2MB_t *)guest_pde)->page_base_addr); + uint32_t max_size = v3_get_max_page_size(info, guest_pa, PAGE_SIZE_2MB); + if (max_size >= PAGE_SIZE_2MB) { + if (handle_2MB_shadow_pagefault_pde_64(info, fault_addr, error_code, shadow_pde_access, + (pde64_2MB_t *)shadow_pde, (pde64_2MB_t *)guest_pde) == 0) { + return 0; + } else { + PrintError("Error handling large pagefault with large page\n"); + return -1; + } + } else { + PrintDebug("Underlying physical memory map doesn't allow use of a large page.\n"); + } + // Fallthrough to handle the region with small pages + } + struct shadow_page_data * shdw_page = create_new_shadow_pt(info); shadow_pt = (pte64_t *)V3_VAddr((void *)shdw_page->page_pa); diff --git a/palacios/src/palacios/vmm_config.c b/palacios/src/palacios/vmm_config.c index 22ab611..7a596d2 100644 --- a/palacios/src/palacios/vmm_config.c +++ b/palacios/src/palacios/vmm_config.c @@ -301,6 +301,7 @@ static int determine_paging_mode(struct guest_info *info, v3_cfg_tree_t * core_c info->shdw_pg_mode = SHADOW_PAGING; } + if (info->shdw_pg_mode == NESTED_PAGING) { PrintDebug("Guest Paging Mode: NESTED_PAGING\n"); } else if (info->shdw_pg_mode == SHADOW_PAGING) { @@ -309,6 +310,12 @@ static int determine_paging_mode(struct guest_info *info, v3_cfg_tree_t * core_c PrintError("Guest paging mode incorrectly set.\n"); return -1; } + + if (strcasecmp(v3_cfg_val(pg_tree, "large_pages"), "true") == 0) { + info->use_large_pages = 1; + PrintDebug("Use of large pages in memory virtualization enabled.\n"); + } + return 0; } diff --git a/palacios/src/palacios/vmm_direct_paging_64.h b/palacios/src/palacios/vmm_direct_paging_64.h index d45ae17..97324d4 100644 --- a/palacios/src/palacios/vmm_direct_paging_64.h +++ b/palacios/src/palacios/vmm_direct_paging_64.h @@ -29,75 +29,6 @@ // Reference: AMD Software Developer Manual Vol.2 Ch.5 "Page Translation and Protection" -static uint32_t get_page_size(struct guest_info * core, addr_t fault_addr) { - addr_t pg_start = 0UL, pg_end = 0UL; // 2MiB page containing the faulting address - struct v3_mem_region * pg_next_reg = NULL; // next immediate mem reg after page start addr - uint32_t page_size = PAGE_SIZE_4KB; - - /* If the guest has been configured for 2MiB pages, then we must check for hooked regions of - * memory which may overlap with the 2MiB page containing the faulting address (due to - * potentially differing access policies in place for e.g. i/o devices and APIC). A 2MiB page - * can be used if a) no region overlaps the page [or b) a region does overlap but fully contains - * the page]. The [bracketed] text pertains to the #if 0'd code below, state D. TODO modify this - * note if someone decides to enable this optimization. It can be tested with the SeaStar - * mapping. - * - * Examples: (CAPS regions are returned by v3_get_next_mem_region; state A returns the base reg) - * - * |region| |region| 2MiB mapped (state A) - * |reg| |REG| 2MiB mapped (state B) - * |region| |reg| |REG| |region| |reg| 4KiB mapped (state C) - * |reg| |reg| |--REGION---| [2MiB mapped (state D)] - * |--------------------------------------------| RAM - * ^ fault addr - * |----|----|----|----|----|page|----|----|----| 2MB pages - * >>>>>>>>>>>>>>>>>>>> search space - */ - - - // guest page maps to a host page + offset (so when we shift, it aligns with a host page) - pg_start = PAGE_ADDR_2MB(fault_addr); - pg_end = (pg_start + PAGE_SIZE_2MB); - - PrintDebug("%s: page [%p,%p) contains address\n", __FUNCTION__, (void *)pg_start, (void *)pg_end); - - pg_next_reg = v3_get_next_mem_region(core->vm_info, core->cpu_id, pg_start); - - if (pg_next_reg == NULL) { - PrintError("%s: Error: address not in base region, %p\n", __FUNCTION__, (void *)fault_addr); - return PAGE_SIZE_4KB; - } - - if (pg_next_reg->flags.base == 1) { - page_size = PAGE_SIZE_2MB; // State A - } else { -#if 0 // State B/C and D optimization - if ((pg_next_reg->guest_end >= pg_end) && - ((pg_next_reg->guest_start >= pg_end) || (pg_next_reg->guest_start <= pg_start))) { - page_size = PAGE_SIZE_2MB; - } - - PrintDebug("%s: region [%p,%p) %s partially overlap with page\n", __FUNCTION__, - (void *)pg_next_reg->guest_start, (void *)pg_next_reg->guest_end, - (page_size == PAGE_SIZE_2MB) ? "does not" : "does"); - -#else // State B/C - if (pg_next_reg->guest_start >= pg_end) { - - page_size = PAGE_SIZE_2MB; - } - - PrintDebug("%s: region [%p,%p) %s overlap with page\n", __FUNCTION__, - (void *)pg_next_reg->guest_start, (void *)pg_next_reg->guest_end, - (page_size == PAGE_SIZE_2MB) ? "does not" : "does"); - -#endif - } - - return page_size; -} - - static inline int handle_passthrough_pagefault_64(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) { pml4e64_t * pml = NULL; pdpe64_t * pdpe = NULL; @@ -124,7 +55,7 @@ static inline int handle_passthrough_pagefault_64(struct guest_info * core, addr * 2. the memory regions can be referenced by a large page */ if ((core->use_large_pages == 1) ) { - page_size = get_page_size(core, fault_addr); + page_size = v3_get_max_page_size(core, fault_addr, PAGE_SIZE_2MB); } PrintDebug("Using page size of %dKB\n", page_size / 1024); diff --git a/palacios/src/palacios/vmm_mem.c b/palacios/src/palacios/vmm_mem.c index c45f18d..78ee376 100644 --- a/palacios/src/palacios/vmm_mem.c +++ b/palacios/src/palacios/vmm_mem.c @@ -385,6 +385,93 @@ void v3_delete_mem_region(struct v3_vm_info * vm, struct v3_mem_region * reg) { } +// Determine if a given address can be handled by a large page of the requested size +uint32_t v3_get_max_page_size(struct guest_info * core, addr_t fault_addr, uint32_t req_size) { + addr_t pg_start = 0UL, pg_end = 0UL; // large page containing the faulting addres + struct v3_mem_region * pg_next_reg = NULL; // next immediate mem reg after page start addr + uint32_t page_size = PAGE_SIZE_4KB; + + /* If the guest has been configured for large pages, then we must check for hooked regions of + * memory which may overlap with the large page containing the faulting address (due to + * potentially differing access policies in place for e.g. i/o devices and APIC). A large page + * can be used if a) no region overlaps the page [or b) a region does overlap but fully contains + * the page]. The [bracketed] text pertains to the #if 0'd code below, state D. TODO modify this + * note if someone decides to enable this optimization. It can be tested with the SeaStar + * mapping. + * + * Examples: (CAPS regions are returned by v3_get_next_mem_region; state A returns the base reg) + * + * |region| |region| 2MiB mapped (state A) + * |reg| |REG| 2MiB mapped (state B) + * |region| |reg| |REG| |region| |reg| 4KiB mapped (state C) + * |reg| |reg| |--REGION---| [2MiB mapped (state D)] + * |--------------------------------------------| RAM + * ^ fault addr + * |----|----|----|----|----|page|----|----|----| 2MB pages + * >>>>>>>>>>>>>>>>>>>> search space + */ + + + // guest page maps to a host page + offset (so when we shift, it aligns with a host page) + switch (req_size) { + case PAGE_SIZE_4KB: + return PAGE_SIZE_4KB; + case PAGE_SIZE_2MB: + pg_start = PAGE_ADDR_2MB(fault_addr); + pg_end = (pg_start + PAGE_SIZE_2MB); + break; + case PAGE_SIZE_4MB: + pg_start = PAGE_ADDR_4MB(fault_addr); + pg_end = (pg_start + PAGE_SIZE_4MB); + break; + case PAGE_SIZE_1GB: + pg_start = PAGE_ADDR_1GB(fault_addr); + pg_end = (pg_start + PAGE_SIZE_1GB); + break; + default: + PrintError("Invalid large page size requested.\n"); + return -1; + } + + PrintDebug("%s: page [%p,%p) contains address\n", __FUNCTION__, (void *)pg_start, (void *)pg_end); + + pg_next_reg = v3_get_next_mem_region(core->vm_info, core->cpu_id, pg_start); + + if (pg_next_reg == NULL) { + PrintError("%s: Error: address not in base region, %p\n", __FUNCTION__, (void *)fault_addr); + return PAGE_SIZE_4KB; + } + + if (pg_next_reg->flags.base == 1) { + page_size = req_size; // State A + PrintDebug("%s: base region [%p,%p) contains page.\n", __FUNCTION__, + (void *)pg_next_reg->guest_start, (void *)pg_next_reg->guest_end); + } else { +#if 0 // State B/C and D optimization + if ((pg_next_reg->guest_end >= pg_end) && + ((pg_next_reg->guest_start >= pg_end) || (pg_next_reg->guest_start <= pg_start))) { + page_size = req_size; + } + + PrintDebug("%s: region [%p,%p) %s partially overlap with page\n", __FUNCTION__, + (void *)pg_next_reg->guest_start, (void *)pg_next_reg->guest_end, + (page_size == req_size) ? "does not" : "does"); + +#else // State B/C + if (pg_next_reg->guest_start >= pg_end) { + + page_size = req_size; + } + + PrintDebug("%s: region [%p,%p) %s overlap with page\n", __FUNCTION__, + (void *)pg_next_reg->guest_start, (void *)pg_next_reg->guest_end, + (page_size == req_size) ? "does not" : "does"); + +#endif + } + + return page_size; +} void v3_print_mem_map(struct v3_vm_info * vm) { diff --git a/utils/guest_creator/default.xml b/utils/guest_creator/default.xml index db20b2c..3e60cd6 100644 --- a/utils/guest_creator/default.xml +++ b/utils/guest_creator/default.xml @@ -3,16 +3,17 @@ - 256 + 256 enable VTLB + 100