2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_paging.h>
22 #include <palacios/vmm.h>
24 #include <palacios/vm_guest_mem.h>
28 #define USE_VMM_PAGING_DEBUG
29 // All of the debug functions defined in vmm_paging.h are implemented in this file
30 #include "vmm_paging_debug.h"
31 #undef USE_VMM_PAGING_DEBUG
34 void delete_page_tables_32(pde32_t * pde) {
41 for (i = 0; (i < MAX_PDE32_ENTRIES); i++) {
43 // We double cast, first to an addr_t to handle 64 bit issues, then to the pointer
44 PrintDebug("PTE base addr %x \n", pde[i].pt_base_addr);
45 pte32_t * pte = (pte32_t *)((addr_t)(uint_t)(pde[i].pt_base_addr << PAGE_POWER));
47 PrintDebug("Deleting PTE %d (%p)\n", i, pte);
52 PrintDebug("Deleting PDE (%p)\n", pde);
53 V3_FreePage(V3_PAddr(pde));
56 void delete_page_tables_32PAE(pdpe32pae_t * pdpe) {
57 PrintError("Unimplemented function\n");
60 void delete_page_tables_64(pml4e64_t * pml4) {
61 PrintError("Unimplemented function\n");
65 int v3_translate_guest_pt_32(struct guest_info * info, addr_t guest_cr3, addr_t vaddr, addr_t * paddr) {
66 addr_t guest_pde_pa = CR3_TO_PDE32_PA(guest_cr3);
67 pde32_t * guest_pde = 0;
68 addr_t guest_pte_pa = 0;
70 if (guest_pa_to_host_va(info, guest_pde_pa, (addr_t*)&guest_pde) == -1) {
71 PrintError("Could not get virtual address of Guest PDE32 (PA=%p)\n",
72 (void *)guest_pde_pa);
76 switch (pde32_lookup(guest_pde, vaddr, &guest_pte_pa)) {
77 case PT_ENTRY_NOT_PRESENT:
80 case PT_ENTRY_LARGE_PAGE:
81 *paddr = guest_pte_pa;
85 pte32_t * guest_pte = NULL;
87 if (guest_pa_to_host_va(info, guest_pte_pa, (addr_t*)&guest_pte) == -1) {
88 PrintError("Could not get virtual address of Guest PTE32 (PA=%p)\n",
89 (void *)guest_pte_pa);
93 if (pte32_lookup(guest_pte, vaddr, paddr) == PT_ENTRY_NOT_PRESENT) {
105 int v3_translate_guest_pt_32pae(struct guest_info * info, addr_t guest_cr3, addr_t vaddr, addr_t * paddr) {
106 addr_t guest_pdpe_pa = CR3_TO_PDPE32PAE_PA(guest_cr3);
107 pdpe32pae_t * guest_pdpe = 0;
108 addr_t guest_pde_pa = 0;
110 if (guest_pa_to_host_va(info, guest_pdpe_pa, (addr_t*)&guest_pdpe) == -1) {
111 PrintError("Could not get virtual address of Guest PDPE32PAE (PA=%p)\n",
112 (void *)guest_pdpe_pa);
116 switch (pdpe32pae_lookup(guest_pdpe, vaddr, &guest_pde_pa))
118 case PT_ENTRY_NOT_PRESENT:
123 pde32pae_t * guest_pde = NULL;
124 addr_t guest_pte_pa = 0;
126 if (guest_pa_to_host_va(info, guest_pde_pa, (addr_t *)&guest_pde) == -1) {
127 PrintError("Could not get virtual Address of Guest PDE32PAE (PA=%p)\n",
128 (void *)guest_pde_pa);
132 switch (pde32pae_lookup(guest_pde, vaddr, &guest_pte_pa))
134 case PT_ENTRY_NOT_PRESENT:
137 case PT_ENTRY_LARGE_PAGE:
138 *paddr = guest_pte_pa;
142 pte32pae_t * guest_pte = NULL;
144 if (guest_pa_to_host_va(info, guest_pte_pa, (addr_t *)&guest_pte) == -1) {
145 PrintError("Could not get virtual Address of Guest PTE32PAE (PA=%p)\n",
146 (void *)guest_pte_pa);
150 if (pte32pae_lookup(guest_pte, vaddr, paddr) == PT_ENTRY_NOT_PRESENT) {
165 int v3_translate_guest_pt_64(struct guest_info * info, addr_t guest_cr3, addr_t vaddr, addr_t * paddr) {
166 addr_t guest_pml4_pa = CR3_TO_PML4E64_PA(guest_cr3);
167 pml4e64_t * guest_pml = 0;
168 addr_t guest_pdpe_pa = 0;
170 if (guest_pa_to_host_va(info, guest_pml4_pa, (addr_t*)&guest_pml) == -1) {
171 PrintError("Could not get virtual address of Guest PML4E64 (PA=%p)\n",
172 (void *)guest_pml4_pa);
176 switch (pml4e64_lookup(guest_pml, vaddr, &guest_pdpe_pa)) {
177 case PT_ENTRY_NOT_PRESENT:
182 pdpe64_t * guest_pdp = NULL;
183 addr_t guest_pde_pa = 0;
185 if (guest_pa_to_host_va(info, guest_pdpe_pa, (addr_t *)&guest_pdp) == -1) {
186 PrintError("Could not get virtual address of Guest PDPE64 (PA=%p)\n",
187 (void *)guest_pdpe_pa);
191 switch (pdpe64_lookup(guest_pdp, vaddr, &guest_pde_pa)) {
192 case PT_ENTRY_NOT_PRESENT:
195 case PT_ENTRY_LARGE_PAGE:
197 PrintError("1 Gigabyte Pages not supported\n");
201 pde64_t * guest_pde = NULL;
202 addr_t guest_pte_pa = 0;
204 if (guest_pa_to_host_va(info, guest_pde_pa, (addr_t *)&guest_pde) == -1) {
205 PrintError("Could not get virtual address of guest PDE64 (PA=%p)\n",
206 (void *)guest_pde_pa);
210 switch (pde64_lookup(guest_pde, vaddr, &guest_pte_pa)) {
211 case PT_ENTRY_NOT_PRESENT:
214 case PT_ENTRY_LARGE_PAGE:
215 *paddr = guest_pte_pa;
219 pte64_t * guest_pte = NULL;
221 if (guest_pa_to_host_va(info, guest_pte_pa, (addr_t *)&guest_pte) == -1) {
222 PrintError("Could not get virtual address of guest PTE64 (PA=%p)\n",
223 (void *)guest_pte_pa);
227 if (pte64_lookup(guest_pte, vaddr, paddr) == PT_ENTRY_NOT_PRESENT) {
245 int v3_translate_host_pt_32(addr_t host_cr3, addr_t vaddr, addr_t * paddr) {
246 pde32_t * host_pde = (pde32_t *)CR3_TO_PDE32_VA(host_cr3);
247 pte32_t * host_pte = 0;
249 switch (pde32_lookup(host_pde, vaddr, (addr_t *)&host_pte)) {
250 case PT_ENTRY_NOT_PRESENT:
253 case PT_ENTRY_LARGE_PAGE:
254 *paddr = (addr_t)host_pte;
257 if (pte32_lookup(host_pte, vaddr, paddr) == PT_ENTRY_NOT_PRESENT) {
266 int v3_translate_host_pt_32pae(addr_t host_cr3, addr_t vaddr, addr_t * paddr) {
273 int v3_translate_host_pt_64(addr_t host_cr3, addr_t vaddr, addr_t * paddr) {
284 * PAGE TABLE LOOKUP FUNCTIONS
287 * The value of entry is a return type:
288 * Page not present: *entry = 0
289 * Large Page: *entry = translated physical address (byte granularity)
290 * PTE entry: *entry is the address of the PTE Page
295 * 32 bit Page Table lookup functions
299 pt_entry_type_t pde32_lookup(pde32_t * pd, addr_t addr, addr_t * entry) {
300 pde32_t * pde_entry = &(pd[PDE32_INDEX(addr)]);
302 if (!pde_entry->present) {
304 return PT_ENTRY_NOT_PRESENT;
305 } else if (pde_entry->large_page) {
306 pde32_4MB_t * large_pde = (pde32_4MB_t *)pde_entry;
308 *entry = BASE_TO_PAGE_ADDR_4MB(large_pde->page_base_addr);
309 *entry += PAGE_OFFSET_4MB(addr);
311 return PT_ENTRY_LARGE_PAGE;
313 *entry = BASE_TO_PAGE_ADDR(pde_entry->pt_base_addr);
314 return PT_ENTRY_PAGE;
320 /* Takes a virtual addr (addr) and returns the physical addr (entry) as defined in the page table
322 pt_entry_type_t pte32_lookup(pte32_t * pt, addr_t addr, addr_t * entry) {
323 pte32_t * pte_entry = &(pt[PTE32_INDEX(addr)]);
325 if (!pte_entry->present) {
327 // PrintDebug("Lookup at non present page (index=%d)\n", PTE32_INDEX(addr));
328 return PT_ENTRY_NOT_PRESENT;
330 *entry = BASE_TO_PAGE_ADDR(pte_entry->page_base_addr) + PAGE_OFFSET(addr);
331 return PT_ENTRY_PAGE;
340 * 32 bit PAE Page Table lookup functions
343 pt_entry_type_t pdpe32pae_lookup(pdpe32pae_t * pdp, addr_t addr, addr_t * entry) {
344 pdpe32pae_t * pdpe_entry = &(pdp[PDPE32PAE_INDEX(addr)]);
346 if (!pdpe_entry->present) {
348 return PT_ENTRY_NOT_PRESENT;
350 *entry = BASE_TO_PAGE_ADDR(pdpe_entry->pd_base_addr);
351 return PT_ENTRY_PAGE;
355 pt_entry_type_t pde32pae_lookup(pde32pae_t * pd, addr_t addr, addr_t * entry) {
356 pde32pae_t * pde_entry = &(pd[PDE32PAE_INDEX(addr)]);
358 if (!pde_entry->present) {
360 return PT_ENTRY_NOT_PRESENT;
361 } else if (pde_entry->large_page) {
362 pde32pae_2MB_t * large_pde = (pde32pae_2MB_t *)pde_entry;
364 *entry = BASE_TO_PAGE_ADDR_2MB(large_pde->page_base_addr);
365 *entry += PAGE_OFFSET_2MB(addr);
367 return PT_ENTRY_LARGE_PAGE;
369 *entry = BASE_TO_PAGE_ADDR(pde_entry->pt_base_addr);
370 return PT_ENTRY_PAGE;
374 pt_entry_type_t pte32pae_lookup(pte32pae_t * pt, addr_t addr, addr_t * entry) {
375 pte32pae_t * pte_entry = &(pt[PTE32PAE_INDEX(addr)]);
377 if (!pte_entry->present) {
379 return PT_ENTRY_NOT_PRESENT;
381 *entry = BASE_TO_PAGE_ADDR(pte_entry->page_base_addr) + PAGE_OFFSET(addr);
382 return PT_ENTRY_PAGE;
390 * 64 bit Page Table lookup functions
393 pt_entry_type_t pml4e64_lookup(pml4e64_t * pml, addr_t addr, addr_t * entry) {
394 pml4e64_t * pml_entry = &(pml[PML4E64_INDEX(addr)]);
396 if (!pml_entry->present) {
398 return PT_ENTRY_NOT_PRESENT;
400 *entry = BASE_TO_PAGE_ADDR(pml_entry->pdp_base_addr);
401 return PT_ENTRY_PAGE;
405 pt_entry_type_t pdpe64_lookup(pdpe64_t * pdp, addr_t addr, addr_t * entry) {
406 pdpe64_t * pdpe_entry = &(pdp[PDPE64_INDEX(addr)]);
408 if (!pdpe_entry->present) {
410 return PT_ENTRY_NOT_PRESENT;
411 } else if (pdpe_entry->large_page) {
412 PrintError("1 Gigabyte pages not supported\n");
416 *entry = BASE_TO_PAGE_ADDR(pdpe_entry->pd_base_addr);
417 return PT_ENTRY_PAGE;
421 pt_entry_type_t pde64_lookup(pde64_t * pd, addr_t addr, addr_t * entry) {
422 pde64_t * pde_entry = &(pd[PDE64_INDEX(addr)]);
424 if (!pde_entry->present) {
426 return PT_ENTRY_NOT_PRESENT;
427 } else if (pde_entry->large_page) {
428 pde64_2MB_t * large_pde = (pde64_2MB_t *)pde_entry;
430 *entry = BASE_TO_PAGE_ADDR_2MB(large_pde->page_base_addr);
431 *entry += PAGE_OFFSET_2MB(addr);
433 return PT_ENTRY_LARGE_PAGE;
435 *entry = BASE_TO_PAGE_ADDR(pde_entry->pt_base_addr);
436 return PT_ENTRY_PAGE;
440 pt_entry_type_t pte64_lookup(pte64_t * pt, addr_t addr, addr_t * entry) {
441 pte64_t * pte_entry = &(pt[PTE64_INDEX(addr)]);
443 if (!pte_entry->present) {
445 return PT_ENTRY_NOT_PRESENT;
447 *entry = BASE_TO_PAGE_ADDR(pte_entry->page_base_addr) + PAGE_OFFSET(addr);
448 return PT_ENTRY_PAGE;
468 pt_access_status_t can_access_pde32(pde32_t * pde, addr_t addr, pf_error_t access_type) {
469 pde32_t * entry = &pde[PDE32_INDEX(addr)];
471 if (entry->present == 0) {
472 return PT_ACCESS_NOT_PRESENT;
473 } else if ((entry->writable == 0) && (access_type.write == 1)) {
474 return PT_ACCESS_WRITE_ERROR;
475 } else if ((entry->user_page == 0) && (access_type.user == 1)) {
477 return PT_ACCESS_USER_ERROR;
484 pt_access_status_t can_access_pte32(pte32_t * pte, addr_t addr, pf_error_t access_type) {
485 pte32_t * entry = &pte[PTE32_INDEX(addr)];
487 if (entry->present == 0) {
488 return PT_ACCESS_NOT_PRESENT;
489 } else if ((entry->writable == 0) && (access_type.write == 1)) {
490 return PT_ACCESS_WRITE_ERROR;
491 } else if ((entry->user_page == 0) && (access_type.user == 1)) {
493 return PT_ACCESS_USER_ERROR;
502 /* We generate a page table to correspond to a given memory layout
503 * pulling pages from the mem_list when necessary
504 * If there are any gaps in the layout, we add them as unmapped pages
506 pde32_t * create_passthrough_pts_32(struct guest_info * guest_info) {
507 addr_t current_page_addr = 0;
509 struct shadow_map * map = &(guest_info->mem_map);
511 pde32_t * pde = V3_VAddr(V3_AllocPages(1));
513 for (i = 0; i < MAX_PDE32_ENTRIES; i++) {
515 pte32_t * pte = V3_VAddr(V3_AllocPages(1));
518 for (j = 0; j < MAX_PTE32_ENTRIES; j++) {
519 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
522 (region->host_type == HOST_REGION_HOOK) ||
523 (region->host_type == HOST_REGION_UNALLOCATED) ||
524 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
525 (region->host_type == HOST_REGION_REMOTE) ||
526 (region->host_type == HOST_REGION_SWAPPED)) {
529 pte[j].user_page = 0;
530 pte[j].write_through = 0;
531 pte[j].cache_disable = 0;
535 pte[j].global_page = 0;
537 pte[j].page_base_addr = 0;
542 pte[j].user_page = 1;
543 pte[j].write_through = 0;
544 pte[j].cache_disable = 0;
548 pte[j].global_page = 0;
551 if (guest_pa_to_host_pa(guest_info, current_page_addr, &host_addr) == -1) {
557 pte[j].page_base_addr = host_addr >> 12;
562 current_page_addr += PAGE_SIZE;
565 if (pte_present == 0) {
566 V3_FreePage(V3_PAddr(pte));
570 pde[i].user_page = 0;
571 pde[i].write_through = 0;
572 pde[i].cache_disable = 0;
575 pde[i].large_page = 0;
576 pde[i].global_page = 0;
578 pde[i].pt_base_addr = 0;
582 pde[i].user_page = 1;
583 pde[i].write_through = 0;
584 pde[i].cache_disable = 0;
587 pde[i].large_page = 0;
588 pde[i].global_page = 0;
590 pde[i].pt_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pte));
599 /* We generate a page table to correspond to a given memory layout
600 * pulling pages from the mem_list when necessary
601 * If there are any gaps in the layout, we add them as unmapped pages
603 pdpe32pae_t * create_passthrough_pts_32PAE(struct guest_info * guest_info) {
604 addr_t current_page_addr = 0;
606 struct shadow_map * map = &(guest_info->mem_map);
608 pdpe32pae_t * pdpe = V3_VAddr(V3_AllocPages(1));
609 memset(pdpe, 0, PAGE_SIZE);
611 for (i = 0; i < MAX_PDPE32PAE_ENTRIES; i++) {
613 pde32pae_t * pde = V3_VAddr(V3_AllocPages(1));
615 for (j = 0; j < MAX_PDE32PAE_ENTRIES; j++) {
619 pte32pae_t * pte = V3_VAddr(V3_AllocPages(1));
622 for (k = 0; k < MAX_PTE32PAE_ENTRIES; k++) {
623 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
626 (region->host_type == HOST_REGION_HOOK) ||
627 (region->host_type == HOST_REGION_UNALLOCATED) ||
628 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
629 (region->host_type == HOST_REGION_REMOTE) ||
630 (region->host_type == HOST_REGION_SWAPPED)) {
633 pte[k].user_page = 0;
634 pte[k].write_through = 0;
635 pte[k].cache_disable = 0;
639 pte[k].global_page = 0;
641 pte[k].page_base_addr = 0;
647 pte[k].user_page = 1;
648 pte[k].write_through = 0;
649 pte[k].cache_disable = 0;
653 pte[k].global_page = 0;
656 if (guest_pa_to_host_pa(guest_info, current_page_addr, &host_addr) == -1) {
662 pte[k].page_base_addr = host_addr >> 12;
668 current_page_addr += PAGE_SIZE;
671 if (pte_present == 0) {
672 V3_FreePage(V3_PAddr(pte));
676 pde[j].user_page = 0;
677 pde[j].write_through = 0;
678 pde[j].cache_disable = 0;
681 pde[j].large_page = 0;
682 pde[j].global_page = 0;
684 pde[j].pt_base_addr = 0;
689 pde[j].user_page = 1;
690 pde[j].write_through = 0;
691 pde[j].cache_disable = 0;
694 pde[j].large_page = 0;
695 pde[j].global_page = 0;
697 pde[j].pt_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pte));
705 if (pde_present == 0) {
706 V3_FreePage(V3_PAddr(pde));
710 pdpe[i].write_through = 0;
711 pdpe[i].cache_disable = 0;
712 pdpe[i].accessed = 0;
715 pdpe[i].vmm_info = 0;
716 pdpe[i].pd_base_addr = 0;
721 pdpe[i].write_through = 0;
722 pdpe[i].cache_disable = 0;
723 pdpe[i].accessed = 0;
726 pdpe[i].vmm_info = 0;
727 pdpe[i].pd_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pde));
742 pml4e64_t * create_passthrough_pts_64(struct guest_info * info) {
743 addr_t current_page_addr = 0;
745 struct shadow_map * map = &(info->mem_map);
747 pml4e64_t * pml = V3_VAddr(V3_AllocPages(1));
749 for (i = 0; i < 1; i++) {
750 int pdpe_present = 0;
751 pdpe64_t * pdpe = V3_VAddr(V3_AllocPages(1));
753 for (j = 0; j < 20; j++) {
755 pde64_t * pde = V3_VAddr(V3_AllocPages(1));
757 for (k = 0; k < MAX_PDE64_ENTRIES; k++) {
759 pte64_t * pte = V3_VAddr(V3_AllocPages(1));
762 for (m = 0; m < MAX_PTE64_ENTRIES; m++) {
763 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
768 (region->host_type == HOST_REGION_HOOK) ||
769 (region->host_type == HOST_REGION_UNALLOCATED) ||
770 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
771 (region->host_type == HOST_REGION_REMOTE) ||
772 (region->host_type == HOST_REGION_SWAPPED)) {
775 pte[m].user_page = 0;
776 pte[m].write_through = 0;
777 pte[m].cache_disable = 0;
781 pte[m].global_page = 0;
783 pte[m].page_base_addr = 0;
788 pte[m].user_page = 1;
789 pte[m].write_through = 0;
790 pte[m].cache_disable = 0;
794 pte[m].global_page = 0;
797 if (guest_pa_to_host_pa(info, current_page_addr, &host_addr) == -1) {
803 pte[m].page_base_addr = PAGE_BASE_ADDR(host_addr);
805 //PrintPTE64(current_page_addr, &(pte[m]));
813 current_page_addr += PAGE_SIZE;
816 if (pte_present == 0) {
817 V3_FreePage(V3_PAddr(pte));
821 pde[k].user_page = 0;
822 pde[k].write_through = 0;
823 pde[k].cache_disable = 0;
826 pde[k].large_page = 0;
827 //pde[k].global_page = 0;
829 pde[k].pt_base_addr = 0;
833 pde[k].user_page = 1;
834 pde[k].write_through = 0;
835 pde[k].cache_disable = 0;
838 pde[k].large_page = 0;
839 //pde[k].global_page = 0;
841 pde[k].pt_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pte));
847 if (pde_present == 0) {
848 V3_FreePage(V3_PAddr(pde));
851 pdpe[j].writable = 0;
852 pdpe[j].user_page = 0;
853 pdpe[j].write_through = 0;
854 pdpe[j].cache_disable = 0;
855 pdpe[j].accessed = 0;
857 pdpe[j].large_page = 0;
858 //pdpe[j].global_page = 0;
859 pdpe[j].vmm_info = 0;
860 pdpe[j].pd_base_addr = 0;
863 pdpe[j].writable = 1;
864 pdpe[j].user_page = 1;
865 pdpe[j].write_through = 0;
866 pdpe[j].cache_disable = 0;
867 pdpe[j].accessed = 0;
869 pdpe[j].large_page = 0;
870 //pdpe[j].global_page = 0;
871 pdpe[j].vmm_info = 0;
872 pdpe[j].pd_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pde));
880 PrintDebug("PML index=%d\n", i);
882 if (pdpe_present == 0) {
883 V3_FreePage(V3_PAddr(pdpe));
887 pml[i].user_page = 0;
888 pml[i].write_through = 0;
889 pml[i].cache_disable = 0;
892 //pml[i].large_page = 0;
893 //pml[i].global_page = 0;
895 pml[i].pdp_base_addr = 0;
899 pml[i].user_page = 1;
900 pml[i].write_through = 0;
901 pml[i].cache_disable = 0;
904 //pml[i].large_page = 0;
905 //pml[i].global_page = 0;
907 pml[i].pdp_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pdpe));