2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_paging.h>
22 #include <palacios/vmm.h>
24 #include <palacios/vm_guest_mem.h>
28 #define USE_VMM_PAGING_DEBUG
29 // All of the debug functions defined in vmm_paging.h are implemented in this file
30 #include "vmm_paging_debug.h"
31 #undef USE_VMM_PAGING_DEBUG
34 void delete_page_tables_32(pde32_t * pde) {
41 for (i = 0; (i < MAX_PDE32_ENTRIES); i++) {
43 // We double cast, first to an addr_t to handle 64 bit issues, then to the pointer
44 PrintDebug("PTE base addr %x \n", pde[i].pt_base_addr);
45 pte32_t * pte = (pte32_t *)((addr_t)(uint_t)(pde[i].pt_base_addr << PAGE_POWER));
47 PrintDebug("Deleting PTE %d (%p)\n", i, pte);
52 PrintDebug("Deleting PDE (%p)\n", pde);
53 V3_FreePage(V3_PAddr(pde));
56 void delete_page_tables_32PAE(pdpe32pae_t * pdpe) {
57 PrintError("Unimplemented function\n");
60 void delete_page_tables_64(pml4e64_t * pml4) {
61 PrintError("Unimplemented function\n");
65 int translate_guest_pt_32(struct guest_info * info, addr_t guest_cr3, addr_t vaddr, addr_t * paddr) {
66 addr_t guest_pde_pa = CR3_TO_PDE32_PA((void *)guest_cr3);
67 pde32_t * guest_pde = 0;
68 addr_t guest_pte_pa = 0;
70 if (guest_pa_to_host_va(info, guest_pde_pa, (addr_t*)&guest_pde) == -1) {
71 PrintError("Could not get virtual address of Guest PDE32 (PA=%p)\n",
72 (void *)guest_pde_pa);
76 switch (pde32_lookup(guest_pde, vaddr, &guest_pte_pa)) {
77 case PT_ENTRY_NOT_PRESENT:
80 case PT_ENTRY_LARGE_PAGE:
81 *paddr = guest_pte_pa;
86 if (guest_pa_to_host_va(info, guest_pte_pa, (addr_t*)&guest_pte) == -1) {
87 PrintError("Could not get virtual address of Guest PTE32 (PA=%p)\n",
88 (void *)guest_pte_pa);
92 if (pte32_lookup(guest_pte, vaddr, paddr) == -1) {
102 int translate_host_pt_32(addr_t host_cr3, addr_t vaddr, addr_t * paddr) {
103 pde32_t * host_pde = (pde32_t *)CR3_TO_PDE32_VA((void *)host_cr3);
104 pte32_t * host_pte = 0;
106 switch (pde32_lookup(host_pde, vaddr, (addr_t *)&host_pte)) {
107 case PT_ENTRY_NOT_PRESENT:
110 case PT_ENTRY_LARGE_PAGE:
111 *paddr = (addr_t)host_pte;
114 if (pte32_lookup(host_pte, vaddr, paddr) == -1) {
123 int translate_host_pt_32pae(addr_t host_cr3, addr_t vaddr, addr_t * paddr) {
130 int translate_host_pt_64(addr_t host_cr3, addr_t vaddr, addr_t * paddr) {
141 * PAGE TABLE LOOKUP FUNCTIONS
144 * The value of entry is a return type:
145 * Page not present: *entry = 0
146 * Large Page: *entry = translated physical address (byte granularity)
147 * PTE entry: *entry is the address of the PTE Page
152 * 32 bit Page Table lookup functions
156 pt_entry_type_t pde32_lookup(pde32_t * pd, addr_t addr, addr_t * entry) {
157 pde32_t * pde_entry = &(pd[PDE32_INDEX(addr)]);
159 if (!pde_entry->present) {
161 return PT_ENTRY_NOT_PRESENT;
162 } else if (pde_entry->large_page) {
163 pde32_4MB_t * large_pde = (pde32_4MB_t *)pde_entry;
165 *entry = BASE_TO_PAGE_ADDR_4MB(large_pde->page_base_addr);
166 *entry += PAGE_OFFSET_4MB(addr);
168 return PT_ENTRY_LARGE_PAGE;
170 *entry = BASE_TO_PAGE_ADDR(pde_entry->pt_base_addr);
171 return PT_ENTRY_PAGE;
177 /* Takes a virtual addr (addr) and returns the physical addr (entry) as defined in the page table
179 pt_entry_type_t pte32_lookup(pte32_t * pt, addr_t addr, addr_t * entry) {
180 pte32_t * pte_entry = &(pt[PTE32_INDEX(addr)]);
182 if (!pte_entry->present) {
184 // PrintDebug("Lookup at non present page (index=%d)\n", PTE32_INDEX(addr));
185 return PT_ENTRY_NOT_PRESENT;
187 *entry = BASE_TO_PAGE_ADDR(pte_entry->page_base_addr) + PAGE_OFFSET(addr);
188 return PT_ENTRY_PAGE;
197 * 32 bit PAE Page Table lookup functions
200 pt_entry_type_t pdpe32pae_lookup(pdpe32pae_t * pdp, addr_t addr, addr_t * entry) {
201 pdpe32pae_t * pdpe_entry = &(pdp[PDPE32PAE_INDEX(addr)]);
203 if (!pdpe_entry->present) {
205 return PT_ENTRY_NOT_PRESENT;
207 *entry = BASE_TO_PAGE_ADDR(pdpe_entry->pd_base_addr);
208 return PT_ENTRY_PAGE;
212 pt_entry_type_t pde32pae_lookup(pde32pae_t * pd, addr_t addr, addr_t * entry) {
213 pde32pae_t * pde_entry = &(pd[PDE32PAE_INDEX(addr)]);
215 if (!pde_entry->present) {
217 return PT_ENTRY_NOT_PRESENT;
218 } else if (pde_entry->large_page) {
219 pde32pae_2MB_t * large_pde = (pde32pae_2MB_t *)pde_entry;
221 *entry = BASE_TO_PAGE_ADDR_2MB(large_pde->page_base_addr);
222 *entry += PAGE_OFFSET_2MB(addr);
224 return PT_ENTRY_LARGE_PAGE;
226 *entry = BASE_TO_PAGE_ADDR(pde_entry->pt_base_addr);
227 return PT_ENTRY_PAGE;
231 pt_entry_type_t pte32pae_lookup(pte32pae_t * pt, addr_t addr, addr_t * entry) {
232 pte32pae_t * pte_entry = &(pt[PTE32PAE_INDEX(addr)]);
234 if (!pte_entry->present) {
236 return PT_ENTRY_NOT_PRESENT;
238 *entry = BASE_TO_PAGE_ADDR(pte_entry->page_base_addr) + PAGE_OFFSET(addr);
239 return PT_ENTRY_PAGE;
247 * 64 bit Page Table lookup functions
250 pt_entry_type_t pml4e64_lookup(pml4e64_t * pml, addr_t addr, addr_t * entry) {
251 pml4e64_t * pml_entry = &(pml[PML4E64_INDEX(addr)]);
253 if (!pml_entry->present) {
255 return PT_ENTRY_NOT_PRESENT;
257 *entry = BASE_TO_PAGE_ADDR(pml_entry->pdp_base_addr);
258 return PT_ENTRY_PAGE;
262 pt_entry_type_t pdpe64_lookup(pdpe64_t * pdp, addr_t addr, addr_t * entry) {
263 pdpe64_t * pdpe_entry = &(pdp[PDPE64_INDEX(addr)]);
265 if (!pdpe_entry->present) {
267 return PT_ENTRY_NOT_PRESENT;
268 } else if (pdpe_entry->large_page) {
269 PrintError("1 Gigabyte pages not supported\n");
273 *entry = BASE_TO_PAGE_ADDR(pdpe_entry->pd_base_addr);
274 return PT_ENTRY_PAGE;
278 pt_entry_type_t pde64_lookup(pde64_t * pd, addr_t addr, addr_t * entry) {
279 pde64_t * pde_entry = &(pd[PDE64_INDEX(addr)]);
281 if (!pde_entry->present) {
283 return PT_ENTRY_NOT_PRESENT;
284 } else if (pde_entry->large_page) {
285 pde64_2MB_t * large_pde = (pde64_2MB_t *)pde_entry;
287 *entry = BASE_TO_PAGE_ADDR_2MB(large_pde->page_base_addr);
288 *entry += PAGE_OFFSET_2MB(addr);
290 return PT_ENTRY_LARGE_PAGE;
292 *entry = BASE_TO_PAGE_ADDR(pde_entry->pt_base_addr);
293 return PT_ENTRY_PAGE;
297 pt_entry_type_t pte64_lookup(pte64_t * pt, addr_t addr, addr_t * entry) {
298 pte64_t * pte_entry = &(pt[PTE64_INDEX(addr)]);
300 if (!pte_entry->present) {
302 return PT_ENTRY_NOT_PRESENT;
304 *entry = BASE_TO_PAGE_ADDR(pte_entry->page_base_addr) + PAGE_OFFSET(addr);
305 return PT_ENTRY_PAGE;
325 pt_access_status_t can_access_pde32(pde32_t * pde, addr_t addr, pf_error_t access_type) {
326 pde32_t * entry = &pde[PDE32_INDEX(addr)];
328 if (entry->present == 0) {
329 return PT_ACCESS_NOT_PRESENT;
330 } else if ((entry->writable == 0) && (access_type.write == 1)) {
331 return PT_ACCESS_WRITE_ERROR;
332 } else if ((entry->user_page == 0) && (access_type.user == 1)) {
334 return PT_ACCESS_USER_ERROR;
341 pt_access_status_t can_access_pte32(pte32_t * pte, addr_t addr, pf_error_t access_type) {
342 pte32_t * entry = &pte[PTE32_INDEX(addr)];
344 if (entry->present == 0) {
345 return PT_ACCESS_NOT_PRESENT;
346 } else if ((entry->writable == 0) && (access_type.write == 1)) {
347 return PT_ACCESS_WRITE_ERROR;
348 } else if ((entry->user_page == 0) && (access_type.user == 1)) {
350 return PT_ACCESS_USER_ERROR;
359 /* We generate a page table to correspond to a given memory layout
360 * pulling pages from the mem_list when necessary
361 * If there are any gaps in the layout, we add them as unmapped pages
363 pde32_t * create_passthrough_pts_32(struct guest_info * guest_info) {
364 addr_t current_page_addr = 0;
366 struct shadow_map * map = &(guest_info->mem_map);
368 pde32_t * pde = V3_VAddr(V3_AllocPages(1));
370 for (i = 0; i < MAX_PDE32_ENTRIES; i++) {
372 pte32_t * pte = V3_VAddr(V3_AllocPages(1));
375 for (j = 0; j < MAX_PTE32_ENTRIES; j++) {
376 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
379 (region->host_type == HOST_REGION_HOOK) ||
380 (region->host_type == HOST_REGION_UNALLOCATED) ||
381 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
382 (region->host_type == HOST_REGION_REMOTE) ||
383 (region->host_type == HOST_REGION_SWAPPED)) {
386 pte[j].user_page = 0;
387 pte[j].write_through = 0;
388 pte[j].cache_disable = 0;
392 pte[j].global_page = 0;
394 pte[j].page_base_addr = 0;
399 pte[j].user_page = 1;
400 pte[j].write_through = 0;
401 pte[j].cache_disable = 0;
405 pte[j].global_page = 0;
408 if (guest_pa_to_host_pa(guest_info, current_page_addr, &host_addr) == -1) {
414 pte[j].page_base_addr = host_addr >> 12;
419 current_page_addr += PAGE_SIZE;
422 if (pte_present == 0) {
423 V3_FreePage(V3_PAddr(pte));
427 pde[i].user_page = 0;
428 pde[i].write_through = 0;
429 pde[i].cache_disable = 0;
432 pde[i].large_page = 0;
433 pde[i].global_page = 0;
435 pde[i].pt_base_addr = 0;
439 pde[i].user_page = 1;
440 pde[i].write_through = 0;
441 pde[i].cache_disable = 0;
444 pde[i].large_page = 0;
445 pde[i].global_page = 0;
447 pde[i].pt_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pte));
456 /* We generate a page table to correspond to a given memory layout
457 * pulling pages from the mem_list when necessary
458 * If there are any gaps in the layout, we add them as unmapped pages
460 pdpe32pae_t * create_passthrough_pts_32PAE(struct guest_info * guest_info) {
461 addr_t current_page_addr = 0;
463 struct shadow_map * map = &(guest_info->mem_map);
465 pdpe32pae_t * pdpe = V3_VAddr(V3_AllocPages(1));
466 memset(pdpe, 0, PAGE_SIZE);
468 for (i = 0; i < MAX_PDPE32PAE_ENTRIES; i++) {
470 pde32pae_t * pde = V3_VAddr(V3_AllocPages(1));
472 for (j = 0; j < MAX_PDE32PAE_ENTRIES; j++) {
476 pte32pae_t * pte = V3_VAddr(V3_AllocPages(1));
479 for (k = 0; k < MAX_PTE32PAE_ENTRIES; k++) {
480 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
483 (region->host_type == HOST_REGION_HOOK) ||
484 (region->host_type == HOST_REGION_UNALLOCATED) ||
485 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
486 (region->host_type == HOST_REGION_REMOTE) ||
487 (region->host_type == HOST_REGION_SWAPPED)) {
490 pte[k].user_page = 0;
491 pte[k].write_through = 0;
492 pte[k].cache_disable = 0;
496 pte[k].global_page = 0;
498 pte[k].page_base_addr = 0;
504 pte[k].user_page = 1;
505 pte[k].write_through = 0;
506 pte[k].cache_disable = 0;
510 pte[k].global_page = 0;
513 if (guest_pa_to_host_pa(guest_info, current_page_addr, &host_addr) == -1) {
519 pte[k].page_base_addr = host_addr >> 12;
525 current_page_addr += PAGE_SIZE;
528 if (pte_present == 0) {
529 V3_FreePage(V3_PAddr(pte));
533 pde[j].user_page = 0;
534 pde[j].write_through = 0;
535 pde[j].cache_disable = 0;
538 pde[j].large_page = 0;
539 pde[j].global_page = 0;
541 pde[j].pt_base_addr = 0;
546 pde[j].user_page = 1;
547 pde[j].write_through = 0;
548 pde[j].cache_disable = 0;
551 pde[j].large_page = 0;
552 pde[j].global_page = 0;
554 pde[j].pt_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pte));
562 if (pde_present == 0) {
563 V3_FreePage(V3_PAddr(pde));
567 pdpe[i].write_through = 0;
568 pdpe[i].cache_disable = 0;
569 pdpe[i].accessed = 0;
572 pdpe[i].vmm_info = 0;
573 pdpe[i].pd_base_addr = 0;
578 pdpe[i].write_through = 0;
579 pdpe[i].cache_disable = 0;
580 pdpe[i].accessed = 0;
583 pdpe[i].vmm_info = 0;
584 pdpe[i].pd_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pde));
599 pml4e64_t * create_passthrough_pts_64(struct guest_info * info) {
600 addr_t current_page_addr = 0;
602 struct shadow_map * map = &(info->mem_map);
604 pml4e64_t * pml = V3_VAddr(V3_AllocPages(1));
606 for (i = 0; i < 1; i++) {
607 int pdpe_present = 0;
608 pdpe64_t * pdpe = V3_VAddr(V3_AllocPages(1));
610 for (j = 0; j < 20; j++) {
612 pde64_t * pde = V3_VAddr(V3_AllocPages(1));
614 for (k = 0; k < MAX_PDE64_ENTRIES; k++) {
616 pte64_t * pte = V3_VAddr(V3_AllocPages(1));
619 for (m = 0; m < MAX_PTE64_ENTRIES; m++) {
620 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
625 (region->host_type == HOST_REGION_HOOK) ||
626 (region->host_type == HOST_REGION_UNALLOCATED) ||
627 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
628 (region->host_type == HOST_REGION_REMOTE) ||
629 (region->host_type == HOST_REGION_SWAPPED)) {
632 pte[m].user_page = 0;
633 pte[m].write_through = 0;
634 pte[m].cache_disable = 0;
638 pte[m].global_page = 0;
640 pte[m].page_base_addr = 0;
645 pte[m].user_page = 1;
646 pte[m].write_through = 0;
647 pte[m].cache_disable = 0;
651 pte[m].global_page = 0;
654 if (guest_pa_to_host_pa(info, current_page_addr, &host_addr) == -1) {
660 pte[m].page_base_addr = PAGE_BASE_ADDR(host_addr);
662 //PrintPTE64(current_page_addr, &(pte[m]));
670 current_page_addr += PAGE_SIZE;
673 if (pte_present == 0) {
674 V3_FreePage(V3_PAddr(pte));
678 pde[k].user_page = 0;
679 pde[k].write_through = 0;
680 pde[k].cache_disable = 0;
683 pde[k].large_page = 0;
684 //pde[k].global_page = 0;
686 pde[k].pt_base_addr = 0;
690 pde[k].user_page = 1;
691 pde[k].write_through = 0;
692 pde[k].cache_disable = 0;
695 pde[k].large_page = 0;
696 //pde[k].global_page = 0;
698 pde[k].pt_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pte));
704 if (pde_present == 0) {
705 V3_FreePage(V3_PAddr(pde));
708 pdpe[j].writable = 0;
709 pdpe[j].user_page = 0;
710 pdpe[j].write_through = 0;
711 pdpe[j].cache_disable = 0;
712 pdpe[j].accessed = 0;
714 pdpe[j].large_page = 0;
715 //pdpe[j].global_page = 0;
716 pdpe[j].vmm_info = 0;
717 pdpe[j].pd_base_addr = 0;
720 pdpe[j].writable = 1;
721 pdpe[j].user_page = 1;
722 pdpe[j].write_through = 0;
723 pdpe[j].cache_disable = 0;
724 pdpe[j].accessed = 0;
726 pdpe[j].large_page = 0;
727 //pdpe[j].global_page = 0;
728 pdpe[j].vmm_info = 0;
729 pdpe[j].pd_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pde));
737 PrintDebug("PML index=%d\n", i);
739 if (pdpe_present == 0) {
740 V3_FreePage(V3_PAddr(pdpe));
744 pml[i].user_page = 0;
745 pml[i].write_through = 0;
746 pml[i].cache_disable = 0;
749 //pml[i].large_page = 0;
750 //pml[i].global_page = 0;
752 pml[i].pdp_base_addr = 0;
756 pml[i].user_page = 1;
757 pml[i].write_through = 0;
758 pml[i].cache_disable = 0;
761 //pml[i].large_page = 0;
762 //pml[i].global_page = 0;
764 pml[i].pdp_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pdpe));