Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


added MSR hook framework
[palacios.git] / palacios / src / palacios / vmm_paging.c
index 127ec45..cdb14f7 100644 (file)
@@ -1,5 +1,21 @@
-/* Northwestern University */
-/* (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> */
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
 
 #include <palacios/vmm_paging.h>
 
@@ -10,6 +26,9 @@
 
 
 
+  
+
+
 void delete_page_tables_pde32(pde32_t * pde) {
   int i;//, j;
 
@@ -19,7 +38,9 @@ void delete_page_tables_pde32(pde32_t * pde) {
 
   for (i = 0; (i < MAX_PDE32_ENTRIES); i++) {
     if (pde[i].present) {
-      pte32_t * pte = (pte32_t *)(pde[i].pt_base_addr << PAGE_POWER);
+      // We double cast, first to an addr_t to handle 64 bit issues, then to the pointer
+      PrintDebug("PTE base addr %x \n", pde[i].pt_base_addr);
+      pte32_t * pte = (pte32_t *)((addr_t)(uint_t)(pde[i].pt_base_addr << PAGE_POWER));
 
       /*
        for (j = 0; (j < MAX_PTE32_ENTRIES); j++) {
@@ -28,13 +49,13 @@ void delete_page_tables_pde32(pde32_t * pde) {
        }
        }
       */
-      //PrintDebug("Deleting PTE %d (%x)\n", i, pte);
+      PrintDebug("Deleting PTE %d (%p)\n", i, pte);
       V3_FreePage(pte);
     }
   }
 
-  //  PrintDebug("Deleting PDE (%x)\n", pde);
-  V3_FreePage(pde);
+  PrintDebug("Deleting PDE (%p)\n", pde);
+  V3_FreePage(V3_PAddr(pde));
 }
 
 
@@ -148,16 +169,16 @@ pt_access_status_t can_access_pte32(pte32_t * pte, addr_t addr, pf_error_t acces
  * pulling pages from the mem_list when necessary
  * If there are any gaps in the layout, we add them as unmapped pages
  */
-pde32_t * create_passthrough_pde32_pts(struct guest_info * guest_info) {
-  ullong_t current_page_addr = 0;
+pde32_t * create_passthrough_pts_32(struct guest_info * guest_info) {
+  addr_t current_page_addr = 0;
   int i, j;
   struct shadow_map * map = &(guest_info->mem_map);
 
-  pde32_t * pde = V3_AllocPages(1);
+  pde32_t * pde = V3_VAddr(V3_AllocPages(1));
 
   for (i = 0; i < MAX_PDE32_ENTRIES; i++) {
     int pte_present = 0;
-    pte32_t * pte = V3_AllocPages(1);
+    pte32_t * pte = V3_VAddr(V3_AllocPages(1));
     
 
     for (j = 0; j < MAX_PTE32_ENTRIES; j++) {
@@ -208,7 +229,7 @@ pde32_t * create_passthrough_pde32_pts(struct guest_info * guest_info) {
     }
 
     if (pte_present == 0) { 
-      V3_FreePage(pte);
+      V3_FreePage(V3_PAddr(pte));
 
       pde[i].present = 0;
       pde[i].writable = 0;
@@ -232,7 +253,7 @@ pde32_t * create_passthrough_pde32_pts(struct guest_info * guest_info) {
       pde[i].large_page = 0;
       pde[i].global_page = 0;
       pde[i].vmm_info = 0;
-      pde[i].pt_base_addr = PAGE_ALIGNED_ADDR(pte);
+      pde[i].pt_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pte));
     }
 
   }
@@ -241,6 +262,321 @@ pde32_t * create_passthrough_pde32_pts(struct guest_info * guest_info) {
 }
 
 
+/* We generate a page table to correspond to a given memory layout
+ * pulling pages from the mem_list when necessary
+ * If there are any gaps in the layout, we add them as unmapped pages
+ */
+pdpe32pae_t * create_passthrough_pts_PAE32(struct guest_info * guest_info) {
+  addr_t current_page_addr = 0;
+  int i, j, k;
+  struct shadow_map * map = &(guest_info->mem_map);
+
+  pdpe32pae_t * pdpe = V3_VAddr(V3_AllocPages(1));
+  memset(pdpe, 0, PAGE_SIZE);
+
+  for (i = 0; i < MAX_PDPE32PAE_ENTRIES; i++) {
+    int pde_present = 0;
+    pde32pae_t * pde = V3_VAddr(V3_AllocPages(1));
+
+    for (j = 0; j < MAX_PDE32PAE_ENTRIES; j++) {
+
+
+      int pte_present = 0;
+      pte32pae_t * pte = V3_VAddr(V3_AllocPages(1));
+      
+      
+      for (k = 0; k < MAX_PTE32PAE_ENTRIES; k++) {
+       struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
+       
+       if (!region || 
+           (region->host_type == HOST_REGION_HOOK) || 
+           (region->host_type == HOST_REGION_UNALLOCATED) || 
+           (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) || 
+           (region->host_type == HOST_REGION_REMOTE) ||
+           (region->host_type == HOST_REGION_SWAPPED)) {
+         pte[k].present = 0;
+         pte[k].writable = 0;
+         pte[k].user_page = 0;
+         pte[k].write_through = 0;
+         pte[k].cache_disable = 0;
+         pte[k].accessed = 0;
+         pte[k].dirty = 0;
+         pte[k].pte_attr = 0;
+         pte[k].global_page = 0;
+         pte[k].vmm_info = 0;
+         pte[k].page_base_addr = 0;
+         pte[k].rsvd = 0;
+       } else {
+         addr_t host_addr;
+         pte[k].present = 1;
+         pte[k].writable = 1;
+         pte[k].user_page = 1;
+         pte[k].write_through = 0;
+         pte[k].cache_disable = 0;
+         pte[k].accessed = 0;
+         pte[k].dirty = 0;
+         pte[k].pte_attr = 0;
+         pte[k].global_page = 0;
+         pte[k].vmm_info = 0;
+         
+         if (guest_pa_to_host_pa(guest_info, current_page_addr, &host_addr) == -1) {
+           // BIG ERROR
+           // PANIC
+           return NULL;
+         }
+         
+         pte[k].page_base_addr = host_addr >> 12;
+         pte[k].rsvd = 0;
+
+         pte_present = 1;
+       }
+       
+       current_page_addr += PAGE_SIZE;
+      }
+      
+      if (pte_present == 0) { 
+       V3_FreePage(V3_PAddr(pte));
+       
+       pde[j].present = 0;
+       pde[j].writable = 0;
+       pde[j].user_page = 0;
+       pde[j].write_through = 0;
+       pde[j].cache_disable = 0;
+       pde[j].accessed = 0;
+       pde[j].avail = 0;
+       pde[j].large_page = 0;
+       pde[j].global_page = 0;
+       pde[j].vmm_info = 0;
+       pde[j].pt_base_addr = 0;
+       pde[j].rsvd = 0;
+      } else {
+       pde[j].present = 1;
+       pde[j].writable = 1;
+       pde[j].user_page = 1;
+       pde[j].write_through = 0;
+       pde[j].cache_disable = 0;
+       pde[j].accessed = 0;
+       pde[j].avail = 0;
+       pde[j].large_page = 0;
+       pde[j].global_page = 0;
+       pde[j].vmm_info = 0;
+       pde[j].pt_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pte));
+       pde[j].rsvd = 0;
+
+       pde_present = 1;
+      }
+      
+    }
+    
+    if (pde_present == 0) { 
+      V3_FreePage(V3_PAddr(pde));
+      
+      pdpe[i].present = 0;
+      pdpe[i].rsvd = 0;
+      pdpe[i].write_through = 0;
+      pdpe[i].cache_disable = 0;
+      pdpe[i].accessed = 0;
+      pdpe[i].avail = 0;
+      pdpe[i].rsvd2 = 0;
+      pdpe[i].vmm_info = 0;
+      pdpe[i].pd_base_addr = 0;
+      pdpe[i].rsvd3 = 0;
+    } else {
+      pdpe[i].present = 1;
+      pdpe[i].rsvd = 0;
+      pdpe[i].write_through = 0;
+      pdpe[i].cache_disable = 0;
+      pdpe[i].accessed = 0;
+      pdpe[i].avail = 0;
+      pdpe[i].rsvd2 = 0;
+      pdpe[i].vmm_info = 0;
+      pdpe[i].pd_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pde));
+      pdpe[i].rsvd3 = 0;
+    }
+    
+  }
+
+
+  return pdpe;
+}
+
+
+
+
+
+
+pml4e64_t * create_passthrough_pts_64(struct guest_info * info) {
+  addr_t current_page_addr = 0;
+  int i, j, k, m;
+  struct shadow_map * map = &(info->mem_map);
+  
+  pml4e64_t * pml = V3_VAddr(V3_AllocPages(1));
+
+  for (i = 0; i < 1; i++) {
+    int pdpe_present = 0;
+    pdpe64_t * pdpe = V3_VAddr(V3_AllocPages(1));
+
+    for (j = 0; j < 1; j++) {
+      int pde_present = 0;
+      pde64_t * pde = V3_VAddr(V3_AllocPages(1));
+
+      for (k = 0; k < MAX_PDE64_ENTRIES; k++) {
+       int pte_present = 0;
+       pte64_t * pte = V3_VAddr(V3_AllocPages(1));
+
+
+       for (m = 0; m < MAX_PTE64_ENTRIES; m++) {
+         struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
+         
+
+         
+         if (!region || 
+             (region->host_type == HOST_REGION_HOOK) || 
+             (region->host_type == HOST_REGION_UNALLOCATED) || 
+             (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) || 
+             (region->host_type == HOST_REGION_REMOTE) ||
+             (region->host_type == HOST_REGION_SWAPPED)) {
+           pte[m].present = 0;
+           pte[m].writable = 0;
+           pte[m].user_page = 0;
+           pte[m].write_through = 0;
+           pte[m].cache_disable = 0;
+           pte[m].accessed = 0;
+           pte[m].dirty = 0;
+           pte[m].pte_attr = 0;
+           pte[m].global_page = 0;
+           pte[m].vmm_info = 0;
+           pte[m].page_base_addr = 0;
+         } else {
+           addr_t host_addr;
+           pte[m].present = 1;
+           pte[m].writable = 1;
+           pte[m].user_page = 1;
+           pte[m].write_through = 0;
+           pte[m].cache_disable = 0;
+           pte[m].accessed = 0;
+           pte[m].dirty = 0;
+           pte[m].pte_attr = 0;
+           pte[m].global_page = 0;
+           pte[m].vmm_info = 0;
+           
+           if (guest_pa_to_host_pa(info, current_page_addr, &host_addr) == -1) {
+             // BIG ERROR
+             // PANIC
+             return NULL;
+           }
+
+           pte[m].page_base_addr = PTE64_BASE_ADDR(host_addr);
+
+           //PrintPTE64(current_page_addr, &(pte[m]));
+
+           pte_present = 1;      
+         }
+
+
+
+
+         current_page_addr += PAGE_SIZE;
+       }
+       
+       if (pte_present == 0) {
+         V3_FreePage(V3_PAddr(pte));
+
+         pde[k].present = 0;
+         pde[k].writable = 0;
+         pde[k].user_page = 0;
+         pde[k].write_through = 0;
+         pde[k].cache_disable = 0;
+         pde[k].accessed = 0;
+         pde[k].reserved = 0;
+         pde[k].large_page = 0;
+         //pde[k].global_page = 0;
+         pde[k].vmm_info = 0;
+         pde[k].pt_base_addr = 0;
+       } else {
+         pde[k].present = 1;
+         pde[k].writable = 1;
+         pde[k].user_page = 1;
+         pde[k].write_through = 0;
+         pde[k].cache_disable = 0;
+         pde[k].accessed = 0;
+         pde[k].reserved = 0;
+         pde[k].large_page = 0;
+         //pde[k].global_page = 0;
+         pde[k].vmm_info = 0;
+         pde[k].pt_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pte));
+
+         pde_present = 1;
+       }
+      }
+
+      if (pde_present == 0) {
+       V3_FreePage(V3_PAddr(pde));
+       
+       pdpe[j].present = 0;
+       pdpe[j].writable = 0;
+       pdpe[j].user_page = 0;
+       pdpe[j].write_through = 0;
+       pdpe[j].cache_disable = 0;
+       pdpe[j].accessed = 0;
+       pdpe[j].reserved = 0;
+       pdpe[j].large_page = 0;
+       //pdpe[j].global_page = 0;
+       pdpe[j].vmm_info = 0;
+       pdpe[j].pd_base_addr = 0;
+      } else {
+       pdpe[j].present = 1;
+       pdpe[j].writable = 1;
+       pdpe[j].user_page = 1;
+       pdpe[j].write_through = 0;
+       pdpe[j].cache_disable = 0;
+       pdpe[j].accessed = 0;
+       pdpe[j].reserved = 0;
+       pdpe[j].large_page = 0;
+       //pdpe[j].global_page = 0;
+       pdpe[j].vmm_info = 0;
+       pdpe[j].pd_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pde));
+
+
+       pdpe_present = 1;
+      }
+
+    }
+
+    PrintDebug("PML index=%d\n", i);
+
+    if (pdpe_present == 0) {
+      V3_FreePage(V3_PAddr(pdpe));
+      
+      pml[i].present = 0;
+      pml[i].writable = 0;
+      pml[i].user_page = 0;
+      pml[i].write_through = 0;
+      pml[i].cache_disable = 0;
+      pml[i].accessed = 0;
+      pml[i].reserved = 0;
+      //pml[i].large_page = 0;
+      //pml[i].global_page = 0;
+      pml[i].vmm_info = 0;
+      pml[i].pdp_base_addr = 0;
+    } else {
+      pml[i].present = 1;
+      pml[i].writable = 1;
+      pml[i].user_page = 1;
+      pml[i].write_through = 0;
+      pml[i].cache_disable = 0;
+      pml[i].accessed = 0;
+      pml[i].reserved = 0;
+      //pml[i].large_page = 0;
+      //pml[i].global_page = 0;
+      pml[i].vmm_info = 0;
+      pml[i].pdp_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pdpe));
+    }
+  }
+
+  return pml;
+}
+
 
 
 
@@ -248,8 +584,8 @@ pde32_t * create_passthrough_pde32_pts(struct guest_info * guest_info) {
 void PrintPDE32(addr_t virtual_address, pde32_t * pde)
 {
   PrintDebug("PDE %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, reserved=%x, largePages=%x, globalPage=%x, kernelInfo=%x\n",
-            virtual_address,
-            (void *) (pde->pt_base_addr << PAGE_POWER),
+            (void *)virtual_address,
+            (void *)(addr_t) (pde->pt_base_addr << PAGE_POWER),
             pde->present,
             pde->writable,
             pde->user_page, 
@@ -261,12 +597,53 @@ void PrintPDE32(addr_t virtual_address, pde32_t * pde)
             pde->global_page,
             pde->vmm_info);
 }
+
   
 void PrintPTE32(addr_t virtual_address, pte32_t * pte)
 {
   PrintDebug("PTE %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, dirty=%x, pteAttribute=%x, globalPage=%x, vmm_info=%x\n",
-            virtual_address,
-            (void*)(pte->page_base_addr << PAGE_POWER),
+            (void *)virtual_address,
+            (void*)(addr_t)(pte->page_base_addr << PAGE_POWER),
+            pte->present,
+            pte->writable,
+            pte->user_page,
+            pte->write_through,
+            pte->cache_disable,
+            pte->accessed,
+            pte->dirty,
+            pte->pte_attr,
+            pte->global_page,
+            pte->vmm_info);
+}
+
+
+
+
+
+
+void PrintPDE64(addr_t virtual_address, pde64_t * pde)
+{
+  PrintDebug("PDE64 %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, reserved=%x, largePages=%x, globalPage=%x, kernelInfo=%x\n",
+            (void *)virtual_address,
+            (void *)(addr_t) (pde->pt_base_addr << PAGE_POWER),
+            pde->present,
+            pde->writable,
+            pde->user_page, 
+            pde->write_through,
+            pde->cache_disable,
+            pde->accessed,
+            pde->reserved,
+            pde->large_page,
+            0,//pde->global_page,
+            pde->vmm_info);
+}
+
+  
+void PrintPTE64(addr_t virtual_address, pte64_t * pte)
+{
+  PrintDebug("PTE64 %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, dirty=%x, pteAttribute=%x, globalPage=%x, vmm_info=%x\n",
+            (void *)virtual_address,
+            (void*)(addr_t)(pte->page_base_addr << PAGE_POWER),
             pte->present,
             pte->writable,
             pte->user_page,
@@ -279,6 +656,9 @@ void PrintPTE32(addr_t virtual_address, pte32_t * pte)
             pte->vmm_info);
 }
 
+  
+
+
 
 
 void PrintPD32(pde32_t * pde)
@@ -309,6 +689,8 @@ void PrintPT32(addr_t starting_address, pte32_t * pte)
 
 
 
+
+
 void PrintDebugPageTables(pde32_t * pde)
 {
   int i;
@@ -318,8 +700,107 @@ void PrintDebugPageTables(pde32_t * pde)
   for (i = 0; (i < MAX_PDE32_ENTRIES); i++) { 
     if (pde[i].present) {
       PrintPDE32((addr_t)(PAGE_SIZE * MAX_PTE32_ENTRIES * i), &(pde[i]));
-      PrintPT32((addr_t)(PAGE_SIZE * MAX_PTE32_ENTRIES * i), (pte32_t *)(pde[i].pt_base_addr << PAGE_POWER));
+      PrintPT32((addr_t)(PAGE_SIZE * MAX_PTE32_ENTRIES * i), (pte32_t *)V3_VAddr((void *)(addr_t)(pde[i].pt_base_addr << PAGE_POWER)));
+    }
+  }
+}
+    
+
+
+
+
+
+
+
+void PrintPDPE32PAE(addr_t virtual_address, pdpe32pae_t * pdpe)
+{
+  PrintDebug("PDPE %p -> %p : present=%x, wt=%x, cd=%x, accessed=%x, kernelInfo=%x\n",
+            (void *)virtual_address,
+            (void *)(addr_t) (pdpe->pd_base_addr << PAGE_POWER),
+            pdpe->present,
+            pdpe->write_through,
+            pdpe->cache_disable,
+            pdpe->accessed,
+            pdpe->vmm_info);
+}
+
+void PrintPDE32PAE(addr_t virtual_address, pde32pae_t * pde)
+{
+  PrintDebug("PDE %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, largePages=%x, globalPage=%x, kernelInfo=%x\n",
+            (void *)virtual_address,
+            (void *)(addr_t) (pde->pt_base_addr << PAGE_POWER),
+            pde->present,
+            pde->writable,
+            pde->user_page, 
+            pde->write_through,
+            pde->cache_disable,
+            pde->accessed,
+            pde->large_page,
+            pde->global_page,
+            pde->vmm_info);
+}
+
+  
+void PrintPTE32PAE(addr_t virtual_address, pte32pae_t * pte)
+{
+  PrintDebug("PTE %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, dirty=%x, pteAttribute=%x, globalPage=%x, vmm_info=%x\n",
+            (void *)virtual_address,
+            (void*)(addr_t)(pte->page_base_addr << PAGE_POWER),
+            pte->present,
+            pte->writable,
+            pte->user_page,
+            pte->write_through,
+            pte->cache_disable,
+            pte->accessed,
+            pte->dirty,
+            pte->pte_attr,
+            pte->global_page,
+            pte->vmm_info);
+}
+
+
+
+
+
+
+void PrintDebugPageTables32PAE(pdpe32pae_t * pdpe)
+{
+  int i, j, k;
+  pde32pae_t * pde;
+  pte32pae_t * pte;
+  addr_t virtual_addr = 0;
+
+  PrintDebug("Dumping the pages starting with the pde page at %p\n", pdpe);
+
+  for (i = 0; (i < MAX_PDPE32PAE_ENTRIES); i++) { 
+
+    if (pdpe[i].present) {
+      pde = (pde32pae_t *)V3_VAddr((void *)(addr_t)BASE_TO_PAGE_ADDR(pdpe[i].pd_base_addr));
+
+      PrintPDPE32PAE(virtual_addr, &(pdpe[i]));
+
+      for (j = 0; j < MAX_PDE32PAE_ENTRIES; j++) {
+
+       if (pde[j].present) {
+         pte = (pte32pae_t *)V3_VAddr((void *)(addr_t)BASE_TO_PAGE_ADDR(pde[j].pt_base_addr));
+
+         PrintPDE32PAE(virtual_addr, &(pde[j]));
+
+         for (k = 0; k < MAX_PTE32PAE_ENTRIES; k++) {
+           if (pte[k].present) {
+             PrintPTE32PAE(virtual_addr, &(pte[k]));
+           }
+
+           virtual_addr += PAGE_SIZE;
+         }
+       } else {
+         virtual_addr += PAGE_SIZE * MAX_PTE32PAE_ENTRIES;
+       }
+      }
+    } else {
+      virtual_addr += PAGE_SIZE * MAX_PDE32PAE_ENTRIES * MAX_PTE32PAE_ENTRIES;
     }
   }
 }
     
+