X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=palacios%2Fsrc%2Fpalacios%2Fvmm_direct_paging_64.h;h=baae5d54d223b792df8f25e42b85bdfeaf574009;hb=d962f2be029772be3f21d9bd206ddf2a9f6a1d20;hp=c4c41e3122b347533855986d8bebe636161f9126;hpb=e531d13b53ac8b32bca19131dd7a2824cb17eff9;p=palacios.git

diff --git a/palacios/src/palacios/vmm_direct_paging_64.h b/palacios/src/palacios/vmm_direct_paging_64.h
index c4c41e3..baae5d5 100644
--- a/palacios/src/palacios/vmm_direct_paging_64.h
+++ b/palacios/src/palacios/vmm_direct_paging_64.h
@@ -29,9 +29,7 @@
 
 // Reference: AMD Software Developer Manual Vol.2 Ch.5 "Page Translation and Protection"
 
-static inline int handle_passthrough_pagefault_64(struct guest_info * info, 
-						  addr_t fault_addr, 
-						  pf_error_t error_code) {
+static inline int handle_passthrough_pagefault_64(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
     pml4e64_t * pml      = NULL;
     pdpe64_t * pdpe      = NULL;
     pde64_t * pde        = NULL;
@@ -44,79 +42,30 @@ static inline int handle_passthrough_pagefault_64(struct guest_info * info,
     int pde_index  = PDE64_INDEX(fault_addr);
     int pte_index  = PTE64_INDEX(fault_addr);
 
-    struct v3_mem_region * region =  v3_get_mem_region(info->vm_info, info->cpu_id, fault_addr);
-    struct v3_mem_region * base_reg = &(info->vm_info->mem_map.base_region);
-
-    /* If the guest has been configured for 2MiB pages, then we must check for hooked regions of
-     * memory which may overlap with the 2MiB page containing the faulting address (due to
-     * potentially differing access policies in place for e.g. i/o devices and APIC). A 2MiB page
-     * can be used if a) no region overlaps the page [or b) a region does overlap but fully contains
-     * the page]. The [bracketed] text pertains to the #if 0'd code below, state D. TODO modify this
-     * note if someone decides to enable this optimization. It can be tested with the SeaStar
-     * mapping.
-     *
-     * Examples: (CAPS regions are returned by v3_get_next_mem_region; state A returns the base reg)
-     *
-     *    |region| |region|                               2MiB mapped (state A)
-     *                   |reg|          |REG|             2MiB mapped (state B)
-     *   |region|     |reg|   |REG| |region|   |reg|      4KiB mapped (state C)
-     *        |reg|  |reg|   |--REGION---|                [2MiB mapped (state D)]
-     * |--------------------------------------------|     RAM
-     *                             ^                      fault addr
-     * |----|----|----|----|----|page|----|----|----|     2MB pages
-     *                           >>>>>>>>>>>>>>>>>>>>     search space
-     */
-    addr_t pg_start = 0UL, pg_end = 0UL; // 2MiB page containing the faulting address
-    struct v3_mem_region * pg_next_reg = NULL; // next immediate mem reg after page start addr
-    bool use_large_page = false;
+    struct v3_mem_region * region =  v3_get_mem_region(core->vm_info, core->cpu_id, fault_addr);
+    int page_size = PAGE_SIZE_4KB;
 
     if (region == NULL) {
 	PrintError("%s: invalid region, addr=%p\n", __FUNCTION__, (void *)fault_addr);
 	return -1;
     }
 
-    // set use_large_page here
-    if (info->vm_info->paging_size == PAGING_2MB) {
-
-	// guest page maps to a host page + offset (so when we shift, it aligns with a host page)
-	pg_start = PAGE_ADDR_2MB(fault_addr);
-	pg_end = (pg_start + PAGE_SIZE_2MB);
-
-	PrintDebug("%s: page   [%p,%p) contains address\n", __FUNCTION__, (void *)pg_start, (void *)pg_end);
-
-	pg_next_reg = v3_get_next_mem_region(info->vm_info, info->cpu_id, pg_start);
-
-	if (pg_next_reg == NULL) {
-	    PrintError("%s: Error: address not in base region, %p\n", __FUNCTION__, (void *)fault_addr);
-	    return -1;
-	}
-
-	if ((pg_next_reg->guest_start == base_reg->guest_start) &&
-		(pg_next_reg->guest_end == base_reg->guest_end)) { // next region == base region
-	    use_large_page = 1; // State A
-	} else {
-#if 0       // State B/C and D optimization
-	    use_large_page = (pg_next_reg->guest_end >= pg_end) &&
-		((pg_next_reg->guest_start >= pg_end) || (pg_next_reg->guest_start <= pg_start));
-	    PrintDebug("%s: region [%p,%p) %s partial overlap with page\n", __FUNCTION__,
-		    (void *)pg_next_reg->guest_start, (void *)pg_next_reg->guest_end,
-		    (use_large_page ? "does not have" : "has"));
-#else       // State B/C
-	    use_large_page = (pg_next_reg->guest_start >= pg_end);
-	    PrintDebug("%s: region [%p,%p) %s overlap with page\n", __FUNCTION__,
-		    (void *)pg_next_reg->guest_start, (void *)pg_next_reg->guest_end,
-		    (use_large_page ? "does not have" : "has"));
-#endif
-	}
+    /*  Check if:
+     *  1. the guest is configured to use large pages and 
+     * 	2. the memory regions can be referenced by a large page
+     */
+    if ((core->use_large_pages == 1) || (core->use_giant_pages == 1)) {
+	page_size = v3_get_max_page_size(core, fault_addr, LONG);
     }
 
-    PrintDebug("%s: Address gets a 2MiB page? %s\n", __FUNCTION__, (use_large_page ? "yes" : "no"));
+    PrintDebug("Using page size of %dKB\n", page_size / 1024);
 
+ 
     // Lookup the correct PML address based on the PAGING MODE
-    if (info->shdw_pg_mode == SHADOW_PAGING) {
-	pml = CR3_TO_PML4E64_VA(info->ctrl_regs.cr3);
+    if (core->shdw_pg_mode == SHADOW_PAGING) {
+	pml = CR3_TO_PML4E64_VA(core->ctrl_regs.cr3);
     } else {
-	pml = CR3_TO_PML4E64_VA(info->direct_map_pt);
+	pml = CR3_TO_PML4E64_VA(core->direct_map_pt);
     }
 
     //Fix up the PML entry
@@ -128,7 +77,7 @@ static inline int handle_passthrough_pagefault_64(struct guest_info * info,
         pml[pml_index].writable = 1;
         pml[pml_index].user_page = 1;
 
-	pml[pml_index].pdp_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pdpe));
+	pml[pml_index].pdp_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pdpe));    
     } else {
 	pdpe = V3_VAddr((void*)BASE_TO_PAGE_ADDR_4KB(pml[pml_index].pdp_base_addr));
     }
@@ -142,21 +91,21 @@ static inline int handle_passthrough_pagefault_64(struct guest_info * info,
 	pdpe[pdpe_index].writable = 1;
 	pdpe[pdpe_index].user_page = 1;
 
-	pdpe[pdpe_index].pd_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pde));
+	pdpe[pdpe_index].pd_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pde));    
     } else {
 	pde = V3_VAddr((void*)BASE_TO_PAGE_ADDR_4KB(pdpe[pdpe_index].pd_base_addr));
     }
 
     // Fix up the 2MiB PDE and exit here
-    if (use_large_page) {
-
+    if (page_size == PAGE_SIZE_2MB) {
 	pde2mb = (pde64_2MB_t *)pde; // all but these two lines are the same for PTE
 	pde2mb[pde_index].large_page = 1;
 
 	if (pde2mb[pde_index].present == 0) {
 	    pde2mb[pde_index].user_page = 1;
 
-	    if ((region->flags.alloced == 1) && (region->flags.read == 1)) {
+	    if ( (region->flags.alloced == 1) && 
+		 (region->flags.read == 1)) {
 		// Full access
 		pde2mb[pde_index].present = 1;
 
@@ -166,25 +115,28 @@ static inline int handle_passthrough_pagefault_64(struct guest_info * info,
 		    pde2mb[pde_index].writable = 0;
 		}
 
-		if (v3_gpa_to_hpa(info, fault_addr, &host_addr) == -1) {
+		if (v3_gpa_to_hpa(core, fault_addr, &host_addr) == -1) {
 		    PrintError("Error Could not translate fault addr (%p)\n", (void *)fault_addr);
 		    return -1;
 		}
 
 		pde2mb[pde_index].page_base_addr = PAGE_BASE_ADDR_2MB(host_addr);
 	    } else {
-		return region->unhandled(info, fault_addr, fault_addr, region, error_code);
+		return region->unhandled(core, fault_addr, fault_addr, region, error_code);
 	    }
 	} else {
-	    // We fix all permissions on the first pass,
+	    // We fix all permissions on the first pass, 
 	    // so we only get here if its an unhandled exception
 
-	    return region->unhandled(info, fault_addr, fault_addr, region, error_code);
+	    return region->unhandled(core, fault_addr, fault_addr, region, error_code);
 	}
-    }
 
-    // Continue with the 4KiB page heirarchy
+	// All done
+	return 0;
+    } 
 
+    // Continue with the 4KiB page heirarchy
+    
     // Fix up the PDE entry
     if (pde[pde_index].present == 0) {
 	pte = (pte64_t *)create_generic_pt_page();
@@ -198,7 +150,6 @@ static inline int handle_passthrough_pagefault_64(struct guest_info * info,
 	pte = V3_VAddr((void*)BASE_TO_PAGE_ADDR_4KB(pde[pde_index].pt_base_addr));
     }
 
-
     // Fix up the PTE entry
     if (pte[pte_index].present == 0) {
 	pte[pte_index].user_page = 1;
@@ -214,26 +165,26 @@ static inline int handle_passthrough_pagefault_64(struct guest_info * info,
 		pte[pte_index].writable = 0;
 	    }
 
-    	    if (v3_gpa_to_hpa(info, fault_addr, &host_addr) == -1) {
+    	    if (v3_gpa_to_hpa(core, fault_addr, &host_addr) == -1) {
 		PrintError("Error Could not translate fault addr (%p)\n", (void *)fault_addr);
 		return -1;
    	    }
 
 	    pte[pte_index].page_base_addr = PAGE_BASE_ADDR_4KB(host_addr);
 	} else {
-	    return region->unhandled(info, fault_addr, fault_addr, region, error_code);
+	    return region->unhandled(core, fault_addr, fault_addr, region, error_code);
 	}
     } else {
 	// We fix all permissions on the first pass, 
 	// so we only get here if its an unhandled exception
 
-	return region->unhandled(info, fault_addr, fault_addr, region, error_code);
+	return region->unhandled(core, fault_addr, fault_addr, region, error_code);
     }
 
     return 0;
 }
 
-static inline int invalidate_addr_64(struct guest_info * info, addr_t inv_addr) {
+static inline int invalidate_addr_64(struct guest_info * core, addr_t inv_addr) {
     pml4e64_t * pml = NULL;
     pdpe64_t * pdpe = NULL;
     pde64_t * pde = NULL;
@@ -251,10 +202,10 @@ static inline int invalidate_addr_64(struct guest_info * info, addr_t inv_addr)
 
     
     // Lookup the correct PDE address based on the PAGING MODE
-    if (info->shdw_pg_mode == SHADOW_PAGING) {
-	pml = CR3_TO_PML4E64_VA(info->ctrl_regs.cr3);
+    if (core->shdw_pg_mode == SHADOW_PAGING) {
+	pml = CR3_TO_PML4E64_VA(core->ctrl_regs.cr3);
     } else {
-	pml = CR3_TO_PML4E64_VA(info->direct_map_pt);
+	pml = CR3_TO_PML4E64_VA(core->direct_map_pt);
     }
 
     if (pml[pml_index].present == 0) {