Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


more work on the memory system
[palacios.git] / palacios / src / geekos / vmm_paging.c
index 5daea0f..d73d0f1 100644 (file)
@@ -2,22 +2,23 @@
 
 #include <geekos/vmm.h>
 
+#include <geekos/vm_guest_mem.h>
 
 
 extern struct vmm_os_hooks * os_hooks;
 
-void delete_page_tables_pde32(vmm_pde_t * pde) {
+void delete_page_tables_pde32(pde32_t * pde) {
   int i, j;
 
   if (pde == NULL) { 
     return;
   }
 
-  for (i = 0; (i < MAX_PAGE_DIR_ENTRIES); i++) {
+  for (i = 0; (i < MAX_PDE32_ENTRIES); i++) {
     if (pde[i].present) {
-      vmm_pte_t * pte = (vmm_pte_t *)(pde[i].pt_base_addr << PAGE_POWER);
+      pte32_t * pte = (pte32_t *)(pde[i].pt_base_addr << PAGE_POWER);
       
-      for (j = 0; (j < MAX_PAGE_TABLE_ENTRIES); j++) {
+      for (j = 0; (j < MAX_PTE32_ENTRIES); j++) {
        if ((pte[j].present)) {
          os_hooks->free_page((void *)(pte[j].page_base_addr << PAGE_POWER));
        }
@@ -31,225 +32,25 @@ void delete_page_tables_pde32(vmm_pde_t * pde) {
 }
 
 
-int init_shadow_page_state(shadow_page_state_t * state) {
-  state->guest_mode = PDE32;
-  state->shadow_mode = PDE32;
-  
-  state->guest_cr3.r_reg = 0;
-  state->shadow_cr3.r_reg = 0;
-
-  return 0;
-}
-  
-
-int wholesale_update_shadow_page_state(shadow_page_state_t * state, shadow_map_t * mem_map) {
-  unsigned i, j;
-  vmm_pde_t * guest_pde;
-  vmm_pde_t * shadow_pde;
-
-
-  // For now, we'll only work with PDE32
-  if (state->guest_mode != PDE32) { 
-    return -1;
-  }
-
-
-  
-  shadow_pde = (vmm_pde_t *)(CR3_TO_PDE(state->shadow_cr3.e_reg.low));  
-  guest_pde = (vmm_pde_t *)(os_hooks->paddr_to_vaddr((void*)CR3_TO_PDE(state->guest_cr3.e_reg.low)));
-
-  // Delete the current page table
-  delete_page_tables_pde32(shadow_pde);
-
-  shadow_pde = os_hooks->allocate_pages(1);
-
-
-  state->shadow_cr3.e_reg.low = (addr_t)shadow_pde;
-
-  state->shadow_mode = PDE32;
-
-  
-  for (i = 0; i < MAX_PAGE_DIR_ENTRIES; i++) { 
-    shadow_pde[i] = guest_pde[i];
-
-    // The shadow can be identical to the guest if it's not present
-    if (!shadow_pde[i].present) { 
-      continue;
-    }
-
-    if (shadow_pde[i].large_pages) { 
-      // large page - just map it through shadow map to generate its physical location
-      addr_t guest_addr = PAGE_ADDR(shadow_pde[i].pt_base_addr);
-      addr_t host_addr;
-      shadow_region_t * ent;
-
-      ent = get_shadow_region_by_addr(mem_map, guest_addr);
-      
-      if (!ent) { 
-       // FIXME Panic here - guest is trying to map to physical memory
-       // it does not own in any way!
-       return -1;
-      }
-
-      // FIXME Bounds check here to see if it's trying to trick us
-      
-      switch (ent->host_type) { 
-      case HOST_REGION_PHYSICAL_MEMORY:
-       // points into currently allocated physical memory, so we just
-       // set up the shadow to point to the mapped location
-       if (guest_paddr_to_host_paddr(ent, guest_addr, &host_addr)) { 
-         // Panic here
-         return -1;
-       }
-
-       shadow_pde[i].pt_base_addr = PAGE_ALIGNED_ADDR(host_addr);
-       // FIXME set vmm_info bits here
-       break;
-      case HOST_REGION_UNALLOCATED:
-       // points to physical memory that is *allowed* but that we
-       // have not yet allocated.  We mark as not present and set a
-       // bit to remind us to allocate it later
-       shadow_pde[i].present = 0;
-       // FIXME Set vminfo bits here so that we know that we will be
-       // allocating it later
-       break;
-      case HOST_REGION_NOTHING:
-       // points to physical memory that is NOT ALLOWED.   
-       // We will mark it as not present and set a bit to remind
-       // us that it's bad later and insert a GPF then
-       shadow_pde[i].present = 0;
-       break;
-      case HOST_REGION_MEMORY_MAPPED_DEVICE:
-      case HOST_REGION_REMOTE:
-      case HOST_REGION_SWAPPED:
-      default:
-       // Panic.  Currently unhandled
-       return -1;
-       break;
-      }
-    } else {
-      vmm_pte_t * guest_pte;
-      vmm_pte_t * shadow_pte;
-      addr_t guest_addr;
-      addr_t guest_pte_host_addr;
-      shadow_region_t * ent;
-
-      // small page - set PDE and follow down to the child table
-      shadow_pde[i] = guest_pde[i];
-
-      guest_addr = PAGE_ADDR(guest_pde[i].pt_base_addr);
-
-      // Allocate a new second level page table for the shadow
-      shadow_pte = os_hooks->allocate_pages(1);
-
-      // make our first level page table in the shadow point to it
-      shadow_pde[i].pt_base_addr = PAGE_ALIGNED_ADDR(shadow_pte);
-      
-      ent = get_shadow_region_by_addr(mem_map, guest_addr);
-      
-
-      /* JRL: This is bad.... */
-      // For now the guest Page Table must always be mapped to host physical memory
-      /* If we swap out a page table or if it isn't present for some reason, this turns real ugly */
-
-      if ((!ent) || (ent->host_type != HOST_REGION_PHYSICAL_MEMORY)) { 
-       // FIXME Panic here - guest is trying to map to physical memory
-       // it does not own in any way!
-       return -1;
-      }
-
-      // Address of the relevant second level page table in the guest
-      if (guest_paddr_to_host_paddr(ent, guest_addr, &guest_pte_host_addr)) { 
-       // Panic here
-       return -1;
-      }
-
-
-      // host_addr now contains the host physical address for the guest's 2nd level page table
-      // Now we transform it to relevant virtual address
-      guest_pte = os_hooks->paddr_to_vaddr((void *)guest_pte_host_addr);
-
-      // Now we walk through the second level guest page table
-      // and clone it into the shadow
-      for (j = 0; j < MAX_PAGE_TABLE_ENTRIES; j++) { 
-       shadow_pte[j] = guest_pte[j];
-
-       addr_t guest_addr = PAGE_ADDR(shadow_pte[j].page_base_addr);
-       
-       shadow_region_t * ent;
-
-       ent = get_shadow_region_by_addr(mem_map, guest_addr);
-      
-       if (!ent) { 
-         // FIXME Panic here - guest is trying to map to physical memory
-         // it does not own in any way!
-         return -1;
-       }
-
-       switch (ent->host_type) { 
-       case HOST_REGION_PHYSICAL_MEMORY:
-         {
-           addr_t host_addr;
-           
-           // points into currently allocated physical memory, so we just
-           // set up the shadow to point to the mapped location
-           if (guest_paddr_to_host_paddr(ent, guest_addr, &host_addr)) { 
-             // Panic here
-             return -1;
-           }
-           
-           shadow_pte[j].page_base_addr = PAGE_ALIGNED_ADDR(host_addr);
-           // FIXME set vmm_info bits here
-           break;
-         }
-       case HOST_REGION_UNALLOCATED:
-         // points to physical memory that is *allowed* but that we
-         // have not yet allocated.  We mark as not present and set a
-         // bit to remind us to allocate it later
-         shadow_pte[j].present = 0;
-         // FIXME Set vminfo bits here so that we know that we will be
-         // allocating it later
-         break;
-       case HOST_REGION_NOTHING:
-         // points to physical memory that is NOT ALLOWED.   
-         // We will mark it as not present and set a bit to remind
-         // us that it's bad later and insert a GPF then
-         shadow_pte[j].present = 0;
-         break;
-       case HOST_REGION_MEMORY_MAPPED_DEVICE:
-       case HOST_REGION_REMOTE:
-       case HOST_REGION_SWAPPED:
-       default:
-         // Panic.  Currently unhandled
-         return -1;
-       break;
-       }
-      }
-    }
-  }
-  return 0;
-}
-      
-
-
 
 /* We generate a page table to correspond to a given memory layout
  * pulling pages from the mem_list when necessary
  * If there are any gaps in the layout, we add them as unmapped pages
  */
-vmm_pde_t * create_passthrough_pde32_pts(shadow_map_t * map) {
+pde32_t * create_passthrough_pde32_pts(guest_info_t * guest_info) {
   ullong_t current_page_addr = 0;
   int i, j;
+  shadow_map_t * map = guest_info->mem_map;
 
 
-  vmm_pde_t * pde = os_hooks->allocate_pages(1);
+  pde32_t * pde = os_hooks->allocate_pages(1);
 
-  for (i = 0; i < MAX_PAGE_DIR_ENTRIES; i++) {
+  for (i = 0; i < MAX_PDE32_ENTRIES; i++) {
     int pte_present = 0;
-    vmm_pte_t * pte = os_hooks->allocate_pages(1);
+    pte32_t * pte = os_hooks->allocate_pages(1);
     
 
-    for (j = 0; j < MAX_PAGE_TABLE_ENTRIES; j++) {
+    for (j = 0; j < MAX_PTE32_ENTRIES; j++) {
       shadow_region_t * region = get_shadow_region_by_addr(map, current_page_addr);
 
       if (!region || 
@@ -277,7 +78,7 @@ vmm_pde_t * create_passthrough_pde32_pts(shadow_map_t * map) {
        pte[j].global_page = 0;
        pte[j].vmm_info = 0;
 
-       if (guest_paddr_to_host_paddr(region, current_page_addr, &host_addr) == -1) {
+       if (guest_pa_to_host_pa(guest_info, current_page_addr, &host_addr) == -1) {
          // BIG ERROR
          // PANIC
          return NULL;
@@ -323,8 +124,54 @@ vmm_pde_t * create_passthrough_pde32_pts(shadow_map_t * map) {
 
 
 
+/* We can't do a full lookup because we don't know what context the page tables are in...
+ * The entry addresses could be pointing to either guest physical memory or host physical memory
+ * Instead we just return the entry address, and a flag to show if it points to a pte or a large page...
+ */
+pde32_entry_type_t pde32_lookup(pde32_t * pde, addr_t addr, addr_t * entry) {
+  pde32_t * pde_entry = pde[PDE32_INDEX(addr)];
+
+  if (!pde_entry->present) {
+    *entry = 0;
+    return NOT_PRESENT;
+  } else  {
+    *entry = PAGE_ADDR(pde_entry->pt_base_addr);
+    
+    if (pde_entry->large_pages) {
+      *entry += PAGE_OFFSET(addr);
+      return LARGE_PAGE;
+    } else {
+      return PTE32;
+    }
+  }  
+  return NOT_PRESENT;
+}
+
+
+int pte32_lookup(pte32_t * pte, addr_t addr, addr_t * entry) {
+  pte32_t * pte_entry = pte[PTE32_INDEX(addr)];
+
+  if (!pte_entry->present) {
+    *entry = 0;
+    return -1;
+  } else {
+    *entry = PAGE_ADDR(pte_entry->page_base_addr);
+    *entry += PAGE_OFFSET(addr);
+    return 0;
+  }
 
-void PrintPDE(void * virtual_address, vmm_pde_t * pde)
+  return -1;
+}
+
+
+
+
+
+
+
+
+
+void PrintPDE32(void * virtual_address, pde32_t * pde)
 {
   PrintDebug("PDE %p -> %p : present=%x, flags=%x, accessed=%x, reserved=%x, largePages=%x, globalPage=%x, kernelInfo=%x\n",
              virtual_address,
@@ -338,7 +185,7 @@ void PrintPDE(void * virtual_address, vmm_pde_t * pde)
              pde->vmm_info);
 }
   
-void PrintPTE(void * virtual_address, vmm_pte_t * pte)
+void PrintPTE32(void * virtual_address, pte32_t * pte)
 {
   PrintDebug("PTE %p -> %p : present=%x, flags=%x, accessed=%x, dirty=%x, pteAttribute=%x, globalPage=%x, vmm_info=%x\n",
              virtual_address,
@@ -354,23 +201,23 @@ void PrintPTE(void * virtual_address, vmm_pte_t * pte)
 
 
 
-void PrintPD(vmm_pde_t * pde)
+void PrintPD32(pde32_t * pde)
 {
   int i;
 
   PrintDebug("Page Directory at %p:\n", pde);
-  for (i = 0; (i < MAX_PAGE_DIR_ENTRIES) && pde[i].present; i++) { 
-    PrintPDE((void*)(PAGE_SIZE * MAX_PAGE_TABLE_ENTRIES * i), &(pde[i]));
+  for (i = 0; (i < MAX_PDE32_ENTRIES) && pde[i].present; i++) { 
+    PrintPDE32((void*)(PAGE_SIZE * MAX_PTE32_ENTRIES * i), &(pde[i]));
   }
 }
 
-void PrintPT(void * starting_address, vmm_pte_t * pte) 
+void PrintPT32(void * starting_address, pte32_t * pte) 
 {
   int i;
 
   PrintDebug("Page Table at %p:\n", pte);
-  for (i = 0; (i < MAX_PAGE_TABLE_ENTRIES) && pte[i].present; i++) { 
-    PrintPTE(starting_address + (PAGE_SIZE * i), &(pte[i]));
+  for (i = 0; (i < MAX_PTE32_ENTRIES) && pte[i].present; i++) { 
+    PrintPTE32(starting_address + (PAGE_SIZE * i), &(pte[i]));
   }
 }
 
@@ -378,211 +225,15 @@ void PrintPT(void * starting_address, vmm_pte_t * pte)
 
 
 
-void PrintDebugPageTables(vmm_pde_t * pde)
+void PrintDebugPageTables(pde32_t * pde)
 {
   int i;
   
   PrintDebug("Dumping the pages starting with the pde page at %p\n", pde);
 
-  for (i = 0; (i < MAX_PAGE_DIR_ENTRIES) && pde[i].present; i++) { 
-    PrintPDE((void *)(PAGE_SIZE * MAX_PAGE_TABLE_ENTRIES * i), &(pde[i]));
-    PrintPT((void *)(PAGE_SIZE * MAX_PAGE_TABLE_ENTRIES * i), (void *)(pde[i].pt_base_addr << PAGE_POWER));
+  for (i = 0; (i < MAX_PDE32_ENTRIES) && pde[i].present; i++) { 
+    PrintPDE32((void *)(PAGE_SIZE * MAX_PTE32_ENTRIES * i), &(pde[i]));
+    PrintPT32((void *)(PAGE_SIZE * MAX_PTE32_ENTRIES * i), (void *)(pde[i].pt_base_addr << PAGE_POWER));
   }
 }
     
-    
-
-#if 0
-
-pml4e64_t * generate_guest_page_tables_64(vmm_mem_layout_t * layout, vmm_mem_list_t * list) {
-  pml4e64_t * pml = os_hooks->allocate_pages(1);
-  int i, j, k, m;
-  ullong_t current_page_addr = 0;
-  uint_t layout_index = 0;
-  uint_t list_index = 0;
-  ullong_t layout_addr = 0;
-  uint_t num_entries = layout->num_pages;  // The number of pages left in the layout
-
-  for (m = 0; m < MAX_PAGE_MAP_ENTRIES_64; m++ ) {
-    if (num_entries == 0) {
-      pml[m].present = 0;
-      pml[m].writable = 0;
-      pml[m].user = 0;
-      pml[m].pwt = 0;
-      pml[m].pcd = 0;
-      pml[m].accessed = 0;
-      pml[m].reserved = 0;
-      pml[m].zero = 0;
-      pml[m].vmm_info = 0;
-      pml[m].pdp_base_addr_lo = 0;
-      pml[m].pdp_base_addr_hi = 0;
-      pml[m].available = 0;
-      pml[m].no_execute = 0;
-    } else {
-      pdpe64_t * pdpe = os_hooks->allocate_pages(1);
-      
-      pml[m].present = 1;
-      pml[m].writable = 1;
-      pml[m].user = 1;
-      pml[m].pwt = 0;
-      pml[m].pcd = 0;
-      pml[m].accessed = 0;
-      pml[m].reserved = 0;
-      pml[m].zero = 0;
-      pml[m].vmm_info = 0;
-      pml[m].pdp_base_addr_lo = PAGE_ALLIGNED_ADDR(pdpe) & 0xfffff;
-      pml[m].pdp_base_addr_hi = 0;
-      pml[m].available = 0;
-      pml[m].no_execute = 0;
-
-      for (k = 0; k < MAX_PAGE_DIR_PTR_ENTRIES_64; k++) {
-       if (num_entries == 0) {
-         pdpe[k].present = 0;
-         pdpe[k].writable = 0;
-         pdpe[k].user = 0;
-         pdpe[k].pwt = 0;
-         pdpe[k].pcd = 0;
-         pdpe[k].accessed = 0;
-         pdpe[k].reserved = 0;
-         pdpe[k].large_pages = 0;
-         pdpe[k].zero = 0;
-         pdpe[k].vmm_info = 0;
-         pdpe[k].pd_base_addr_lo = 0;
-         pdpe[k].pd_base_addr_hi = 0;
-         pdpe[k].available = 0;
-         pdpe[k].no_execute = 0;
-       } else {
-         pde64_t * pde = os_hooks->allocate_pages(1);
-
-         pdpe[k].present = 1;
-         pdpe[k].writable = 1;
-         pdpe[k].user = 1;
-         pdpe[k].pwt = 0;
-         pdpe[k].pcd = 0;
-         pdpe[k].accessed = 0;
-         pdpe[k].reserved = 0;
-         pdpe[k].large_pages = 0;
-         pdpe[k].zero = 0;
-         pdpe[k].vmm_info = 0;
-         pdpe[k].pd_base_addr_lo = PAGE_ALLIGNED_ADDR(pde) & 0xfffff;
-         pdpe[k].pd_base_addr_hi = 0;
-         pdpe[k].available = 0;
-         pdpe[k].no_execute = 0;
-
-
-
-         for (i = 0; i < MAX_PAGE_DIR_ENTRIES_64; i++) {
-           if (num_entries == 0) { 
-             pde[i].present = 0;
-             pde[i].flags = 0;
-             pde[i].accessed = 0;
-             pde[i].reserved = 0;
-             pde[i].large_pages = 0;
-             pde[i].reserved2 = 0;
-             pde[i].vmm_info = 0;
-             pde[i].pt_base_addr_lo = 0;
-             pde[i].pt_base_addr_hi = 0;
-             pde[i].available = 0;
-             pde[i].no_execute = 0;
-           } else {
-             pte64_t * pte = os_hooks->allocate_pages(1);
-             
-             pde[i].present = 1;
-             pde[i].flags = VM_READ | VM_WRITE | VM_EXEC | VM_USER;
-             pde[i].accessed = 0;
-             pde[i].reserved = 0;
-             pde[i].large_pages = 0;
-             pde[i].reserved2 = 0;
-             pde[i].vmm_info = 0;
-             pde[i].pt_base_addr_lo = PAGE_ALLIGNED_ADDR(pte) & 0xfffff;
-             pde[i].pt_base_addr_hi = 0;
-             pde[i].available = 0;
-             pde[i].no_execute = 0;
-
-             
-             for (j = 0; j < MAX_PAGE_TABLE_ENTRIES_64; j++) {
-               layout_addr = get_mem_layout_addr(layout, layout_index);
-               
-               if ((current_page_addr < layout_addr) || (num_entries == 0)) {
-                 // We have a gap in the layout, fill with unmapped page
-                 pte[j].present = 0;
-                 pte[j].flags = 0;
-                 pte[j].accessed = 0;
-                 pte[j].dirty = 0;
-                 pte[j].pte_attr = 0;
-                 pte[j].global_page = 0;
-                 pte[j].vmm_info = 0;
-                 pte[j].page_base_addr_lo = 0;
-                 pte[j].page_base_addr_hi = 0;
-                 pte[j].available = 0;
-                 pte[j].no_execute = 0;
-
-                 current_page_addr += PAGE_SIZE;
-               } else if (current_page_addr == layout_addr) {
-                 // Set up the Table entry to map correctly to the layout region
-                 layout_region_t * page_region = get_mem_layout_region(layout, layout_addr);
-                 
-                 if (page_region->type == UNMAPPED) {
-                   pte[j].present = 0;
-                   pte[j].flags = 0;
-                 } else {
-                   pte[j].present = 1;
-                   pte[j].flags = VM_READ | VM_WRITE | VM_EXEC | VM_USER;
-                 }         
-                 
-                 pte[j].accessed = 0;
-                 pte[j].dirty = 0;
-                 pte[j].pte_attr = 0;
-                 pte[j].global_page = 0;
-                 pte[j].vmm_info = 0;
-                 pte[j].available = 0;
-                 pte[j].no_execute = 0;
-
-                 if (page_region->type == UNMAPPED) {
-                   pte[j].page_base_addr_lo = 0;
-                   pte[j].page_base_addr_hi = 0;
-                 } else if (page_region->type == SHARED) {
-                   addr_t host_addr = page_region->host_addr + (layout_addr - page_region->start);
-                   
-                   pte[j].page_base_addr_lo = PAGE_ALLIGNED_ADDR(host_addr) & 0xfffff;
-                   pte[j].page_base_addr_hi = 0;
-                   pte[j].vmm_info = SHARED_PAGE;
-                 } else if (page_region->type == GUEST) {
-                   addr_t list_addr =  get_mem_list_addr(list, list_index++);
-                   
-                   if (list_addr == -1) {
-                     // error
-                     // cleanup...
-                     //free_guest_page_tables(pde);
-                     return NULL;
-                   }
-                   PrintDebug("Adding guest page (%x)\n", list_addr);
-                   pte[j].page_base_addr_lo = PAGE_ALLIGNED_ADDR(list_addr) & 0xfffff;
-                   pte[j].page_base_addr_hi = 0;
-
-                   // Reset this when we move over to dynamic page allocation
-                   //      pte[j].vmm_info = GUEST_PAGE;           
-                   pte[j].vmm_info = SHARED_PAGE;
-                 }
-                 
-                 num_entries--;
-                 current_page_addr += PAGE_SIZE;
-                 layout_index++;
-               } else {
-                 // error
-                 PrintDebug("Error creating page table...\n");
-                 // cleanup
-                 //              free_guest_page_tables64(pde);
-                 return NULL;
-               }
-             }
-           }
-         }
-       }
-      }
-    }
-  }
-  return pml;
-}
-
-#endif