2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_paging.h>
22 #include <palacios/vmm.h>
24 #include <palacios/vm_guest_mem.h>
32 void delete_page_tables_pde32(pde32_t * pde) {
39 for (i = 0; (i < MAX_PDE32_ENTRIES); i++) {
41 // We double cast, first to an addr_t to handle 64 bit issues, then to the pointer
42 PrintDebug("PTE base addr %x \n", pde[i].pt_base_addr);
43 pte32_t * pte = (pte32_t *)((addr_t)(uint_t)(pde[i].pt_base_addr << PAGE_POWER));
46 for (j = 0; (j < MAX_PTE32_ENTRIES); j++) {
47 if ((pte[j].present)) {
48 os_hooks->free_page((void *)(pte[j].page_base_addr << PAGE_POWER));
52 PrintDebug("Deleting PTE %d (%p)\n", i, pte);
57 PrintDebug("Deleting PDE (%p)\n", pde);
58 V3_FreePage(V3_PAddr(pde));
65 int pt32_lookup(pde32_t * pd, addr_t vaddr, addr_t * paddr) {
67 pde32_entry_type_t pde_entry_type;
73 pde_entry_type = pde32_lookup(pd, vaddr, &pde_entry);
75 if (pde_entry_type == PDE32_ENTRY_PTE32) {
76 return pte32_lookup((pte32_t *)pde_entry, vaddr, paddr);
77 } else if (pde_entry_type == PDE32_ENTRY_LARGE_PAGE) {
87 /* We can't do a full lookup because we don't know what context the page tables are in...
88 * The entry addresses could be pointing to either guest physical memory or host physical memory
89 * Instead we just return the entry address, and a flag to show if it points to a pte or a large page...
91 pde32_entry_type_t pde32_lookup(pde32_t * pd, addr_t addr, addr_t * entry) {
92 pde32_t * pde_entry = &(pd[PDE32_INDEX(addr)]);
94 if (!pde_entry->present) {
96 return PDE32_ENTRY_NOT_PRESENT;
99 if (pde_entry->large_page) {
100 pde32_4MB_t * large_pde = (pde32_4MB_t *)pde_entry;
102 *entry = PDE32_4MB_T_ADDR(*large_pde);
103 *entry += PD32_4MB_PAGE_OFFSET(addr);
104 return PDE32_ENTRY_LARGE_PAGE;
106 *entry = PDE32_T_ADDR(*pde_entry);
107 return PDE32_ENTRY_PTE32;
110 return PDE32_ENTRY_NOT_PRESENT;
115 /* Takes a virtual addr (addr) and returns the physical addr (entry) as defined in the page table
117 int pte32_lookup(pte32_t * pt, addr_t addr, addr_t * entry) {
118 pte32_t * pte_entry = &(pt[PTE32_INDEX(addr)]);
120 if (!pte_entry->present) {
122 PrintDebug("Lookup at non present page (index=%d)\n", PTE32_INDEX(addr));
125 *entry = PTE32_T_ADDR(*pte_entry) + PT32_PAGE_OFFSET(addr);
134 pt_access_status_t can_access_pde32(pde32_t * pde, addr_t addr, pf_error_t access_type) {
135 pde32_t * entry = &pde[PDE32_INDEX(addr)];
137 if (entry->present == 0) {
138 return PT_ENTRY_NOT_PRESENT;
139 } else if ((entry->writable == 0) && (access_type.write == 1)) {
140 return PT_WRITE_ERROR;
141 } else if ((entry->user_page == 0) && (access_type.user == 1)) {
143 return PT_USER_ERROR;
150 pt_access_status_t can_access_pte32(pte32_t * pte, addr_t addr, pf_error_t access_type) {
151 pte32_t * entry = &pte[PTE32_INDEX(addr)];
153 if (entry->present == 0) {
154 return PT_ENTRY_NOT_PRESENT;
155 } else if ((entry->writable == 0) && (access_type.write == 1)) {
156 return PT_WRITE_ERROR;
157 } else if ((entry->user_page == 0) && (access_type.user == 1)) {
159 return PT_USER_ERROR;
168 /* We generate a page table to correspond to a given memory layout
169 * pulling pages from the mem_list when necessary
170 * If there are any gaps in the layout, we add them as unmapped pages
172 pde32_t * create_passthrough_pts_32(struct guest_info * guest_info) {
173 addr_t current_page_addr = 0;
175 struct shadow_map * map = &(guest_info->mem_map);
177 pde32_t * pde = V3_VAddr(V3_AllocPages(1));
179 for (i = 0; i < MAX_PDE32_ENTRIES; i++) {
181 pte32_t * pte = V3_VAddr(V3_AllocPages(1));
184 for (j = 0; j < MAX_PTE32_ENTRIES; j++) {
185 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
188 (region->host_type == HOST_REGION_HOOK) ||
189 (region->host_type == HOST_REGION_UNALLOCATED) ||
190 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
191 (region->host_type == HOST_REGION_REMOTE) ||
192 (region->host_type == HOST_REGION_SWAPPED)) {
195 pte[j].user_page = 0;
196 pte[j].write_through = 0;
197 pte[j].cache_disable = 0;
201 pte[j].global_page = 0;
203 pte[j].page_base_addr = 0;
208 pte[j].user_page = 1;
209 pte[j].write_through = 0;
210 pte[j].cache_disable = 0;
214 pte[j].global_page = 0;
217 if (guest_pa_to_host_pa(guest_info, current_page_addr, &host_addr) == -1) {
223 pte[j].page_base_addr = host_addr >> 12;
228 current_page_addr += PAGE_SIZE;
231 if (pte_present == 0) {
232 V3_FreePage(V3_PAddr(pte));
236 pde[i].user_page = 0;
237 pde[i].write_through = 0;
238 pde[i].cache_disable = 0;
241 pde[i].large_page = 0;
242 pde[i].global_page = 0;
244 pde[i].pt_base_addr = 0;
248 pde[i].user_page = 1;
249 pde[i].write_through = 0;
250 pde[i].cache_disable = 0;
253 pde[i].large_page = 0;
254 pde[i].global_page = 0;
256 pde[i].pt_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pte));
265 /* We generate a page table to correspond to a given memory layout
266 * pulling pages from the mem_list when necessary
267 * If there are any gaps in the layout, we add them as unmapped pages
269 pdpe32pae_t * create_passthrough_pts_PAE32(struct guest_info * guest_info) {
270 addr_t current_page_addr = 0;
272 struct shadow_map * map = &(guest_info->mem_map);
274 pdpe32pae_t * pdpe = V3_VAddr(V3_AllocPages(1));
275 memset(pdpe, 0, PAGE_SIZE);
277 for (i = 0; i < MAX_PDPE32PAE_ENTRIES; i++) {
279 pde32pae_t * pde = V3_VAddr(V3_AllocPages(1));
281 for (j = 0; j < MAX_PDE32PAE_ENTRIES; j++) {
285 pte32pae_t * pte = V3_VAddr(V3_AllocPages(1));
288 for (k = 0; k < MAX_PTE32PAE_ENTRIES; k++) {
289 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
292 (region->host_type == HOST_REGION_HOOK) ||
293 (region->host_type == HOST_REGION_UNALLOCATED) ||
294 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
295 (region->host_type == HOST_REGION_REMOTE) ||
296 (region->host_type == HOST_REGION_SWAPPED)) {
299 pte[k].user_page = 0;
300 pte[k].write_through = 0;
301 pte[k].cache_disable = 0;
305 pte[k].global_page = 0;
307 pte[k].page_base_addr = 0;
313 pte[k].user_page = 1;
314 pte[k].write_through = 0;
315 pte[k].cache_disable = 0;
319 pte[k].global_page = 0;
322 if (guest_pa_to_host_pa(guest_info, current_page_addr, &host_addr) == -1) {
328 pte[k].page_base_addr = host_addr >> 12;
334 current_page_addr += PAGE_SIZE;
337 if (pte_present == 0) {
338 V3_FreePage(V3_PAddr(pte));
342 pde[j].user_page = 0;
343 pde[j].write_through = 0;
344 pde[j].cache_disable = 0;
347 pde[j].large_page = 0;
348 pde[j].global_page = 0;
350 pde[j].pt_base_addr = 0;
355 pde[j].user_page = 1;
356 pde[j].write_through = 0;
357 pde[j].cache_disable = 0;
360 pde[j].large_page = 0;
361 pde[j].global_page = 0;
363 pde[j].pt_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pte));
371 if (pde_present == 0) {
372 V3_FreePage(V3_PAddr(pde));
376 pdpe[i].write_through = 0;
377 pdpe[i].cache_disable = 0;
378 pdpe[i].accessed = 0;
381 pdpe[i].vmm_info = 0;
382 pdpe[i].pd_base_addr = 0;
387 pdpe[i].write_through = 0;
388 pdpe[i].cache_disable = 0;
389 pdpe[i].accessed = 0;
392 pdpe[i].vmm_info = 0;
393 pdpe[i].pd_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pde));
408 pml4e64_t * create_passthrough_pts_64(struct guest_info * info) {
409 addr_t current_page_addr = 0;
411 struct shadow_map * map = &(info->mem_map);
413 pml4e64_t * pml = V3_VAddr(V3_AllocPages(1));
415 for (i = 0; i < 1; i++) {
416 int pdpe_present = 0;
417 pdpe64_t * pdpe = V3_VAddr(V3_AllocPages(1));
419 for (j = 0; j < 1; j++) {
421 pde64_t * pde = V3_VAddr(V3_AllocPages(1));
423 for (k = 0; k < MAX_PDE64_ENTRIES; k++) {
425 pte64_t * pte = V3_VAddr(V3_AllocPages(1));
428 for (m = 0; m < MAX_PTE64_ENTRIES; m++) {
429 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
434 (region->host_type == HOST_REGION_HOOK) ||
435 (region->host_type == HOST_REGION_UNALLOCATED) ||
436 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
437 (region->host_type == HOST_REGION_REMOTE) ||
438 (region->host_type == HOST_REGION_SWAPPED)) {
441 pte[m].user_page = 0;
442 pte[m].write_through = 0;
443 pte[m].cache_disable = 0;
447 pte[m].global_page = 0;
449 pte[m].page_base_addr = 0;
454 pte[m].user_page = 1;
455 pte[m].write_through = 0;
456 pte[m].cache_disable = 0;
460 pte[m].global_page = 0;
463 if (guest_pa_to_host_pa(info, current_page_addr, &host_addr) == -1) {
469 pte[m].page_base_addr = PTE64_BASE_ADDR(host_addr);
471 //PrintPTE64(current_page_addr, &(pte[m]));
479 current_page_addr += PAGE_SIZE;
482 if (pte_present == 0) {
483 V3_FreePage(V3_PAddr(pte));
487 pde[k].user_page = 0;
488 pde[k].write_through = 0;
489 pde[k].cache_disable = 0;
492 pde[k].large_page = 0;
493 //pde[k].global_page = 0;
495 pde[k].pt_base_addr = 0;
499 pde[k].user_page = 1;
500 pde[k].write_through = 0;
501 pde[k].cache_disable = 0;
504 pde[k].large_page = 0;
505 //pde[k].global_page = 0;
507 pde[k].pt_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pte));
513 if (pde_present == 0) {
514 V3_FreePage(V3_PAddr(pde));
517 pdpe[j].writable = 0;
518 pdpe[j].user_page = 0;
519 pdpe[j].write_through = 0;
520 pdpe[j].cache_disable = 0;
521 pdpe[j].accessed = 0;
522 pdpe[j].reserved = 0;
523 pdpe[j].large_page = 0;
524 //pdpe[j].global_page = 0;
525 pdpe[j].vmm_info = 0;
526 pdpe[j].pd_base_addr = 0;
529 pdpe[j].writable = 1;
530 pdpe[j].user_page = 1;
531 pdpe[j].write_through = 0;
532 pdpe[j].cache_disable = 0;
533 pdpe[j].accessed = 0;
534 pdpe[j].reserved = 0;
535 pdpe[j].large_page = 0;
536 //pdpe[j].global_page = 0;
537 pdpe[j].vmm_info = 0;
538 pdpe[j].pd_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pde));
546 PrintDebug("PML index=%d\n", i);
548 if (pdpe_present == 0) {
549 V3_FreePage(V3_PAddr(pdpe));
553 pml[i].user_page = 0;
554 pml[i].write_through = 0;
555 pml[i].cache_disable = 0;
558 //pml[i].large_page = 0;
559 //pml[i].global_page = 0;
561 pml[i].pdp_base_addr = 0;
565 pml[i].user_page = 1;
566 pml[i].write_through = 0;
567 pml[i].cache_disable = 0;
570 //pml[i].large_page = 0;
571 //pml[i].global_page = 0;
573 pml[i].pdp_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pdpe));
584 void PrintPDE32(addr_t virtual_address, pde32_t * pde)
586 PrintDebug("PDE %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, reserved=%x, largePages=%x, globalPage=%x, kernelInfo=%x\n",
587 (void *)virtual_address,
588 (void *)(addr_t) (pde->pt_base_addr << PAGE_POWER),
602 void PrintPTE32(addr_t virtual_address, pte32_t * pte)
604 PrintDebug("PTE %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, dirty=%x, pteAttribute=%x, globalPage=%x, vmm_info=%x\n",
605 (void *)virtual_address,
606 (void*)(addr_t)(pte->page_base_addr << PAGE_POWER),
624 void PrintPDE64(addr_t virtual_address, pde64_t * pde)
626 PrintDebug("PDE64 %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, reserved=%x, largePages=%x, globalPage=%x, kernelInfo=%x\n",
627 (void *)virtual_address,
628 (void *)(addr_t) (pde->pt_base_addr << PAGE_POWER),
637 0,//pde->global_page,
642 void PrintPTE64(addr_t virtual_address, pte64_t * pte)
644 PrintDebug("PTE64 %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, dirty=%x, pteAttribute=%x, globalPage=%x, vmm_info=%x\n",
645 (void *)virtual_address,
646 (void*)(addr_t)(pte->page_base_addr << PAGE_POWER),
664 void PrintPD32(pde32_t * pde)
668 PrintDebug("Page Directory at %p:\n", pde);
669 for (i = 0; (i < MAX_PDE32_ENTRIES); i++) {
670 if ( pde[i].present) {
671 PrintPDE32((addr_t)(PAGE_SIZE * MAX_PTE32_ENTRIES * i), &(pde[i]));
676 void PrintPT32(addr_t starting_address, pte32_t * pte)
680 PrintDebug("Page Table at %p:\n", pte);
681 for (i = 0; (i < MAX_PTE32_ENTRIES) ; i++) {
682 if (pte[i].present) {
683 PrintPTE32(starting_address + (PAGE_SIZE * i), &(pte[i]));
694 void PrintDebugPageTables(pde32_t * pde)
698 PrintDebug("Dumping the pages starting with the pde page at %p\n", pde);
700 for (i = 0; (i < MAX_PDE32_ENTRIES); i++) {
701 if (pde[i].present) {
702 PrintPDE32((addr_t)(PAGE_SIZE * MAX_PTE32_ENTRIES * i), &(pde[i]));
703 PrintPT32((addr_t)(PAGE_SIZE * MAX_PTE32_ENTRIES * i), (pte32_t *)V3_VAddr((void *)(addr_t)(pde[i].pt_base_addr << PAGE_POWER)));
715 void PrintPDPE32PAE(addr_t virtual_address, pdpe32pae_t * pdpe)
717 PrintDebug("PDPE %p -> %p : present=%x, wt=%x, cd=%x, accessed=%x, kernelInfo=%x\n",
718 (void *)virtual_address,
719 (void *)(addr_t) (pdpe->pd_base_addr << PAGE_POWER),
727 void PrintPDE32PAE(addr_t virtual_address, pde32pae_t * pde)
729 PrintDebug("PDE %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, largePages=%x, globalPage=%x, kernelInfo=%x\n",
730 (void *)virtual_address,
731 (void *)(addr_t) (pde->pt_base_addr << PAGE_POWER),
744 void PrintPTE32PAE(addr_t virtual_address, pte32pae_t * pte)
746 PrintDebug("PTE %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, dirty=%x, pteAttribute=%x, globalPage=%x, vmm_info=%x\n",
747 (void *)virtual_address,
748 (void*)(addr_t)(pte->page_base_addr << PAGE_POWER),
766 void PrintDebugPageTables32PAE(pdpe32pae_t * pdpe)
771 addr_t virtual_addr = 0;
773 PrintDebug("Dumping the pages starting with the pde page at %p\n", pdpe);
775 for (i = 0; (i < MAX_PDPE32PAE_ENTRIES); i++) {
777 if (pdpe[i].present) {
778 pde = (pde32pae_t *)V3_VAddr((void *)(addr_t)BASE_TO_PAGE_ADDR(pdpe[i].pd_base_addr));
780 PrintPDPE32PAE(virtual_addr, &(pdpe[i]));
782 for (j = 0; j < MAX_PDE32PAE_ENTRIES; j++) {
784 if (pde[j].present) {
785 pte = (pte32pae_t *)V3_VAddr((void *)(addr_t)BASE_TO_PAGE_ADDR(pde[j].pt_base_addr));
787 PrintPDE32PAE(virtual_addr, &(pde[j]));
789 for (k = 0; k < MAX_PTE32PAE_ENTRIES; k++) {
790 if (pte[k].present) {
791 PrintPTE32PAE(virtual_addr, &(pte[k]));
794 virtual_addr += PAGE_SIZE;
797 virtual_addr += PAGE_SIZE * MAX_PTE32PAE_ENTRIES;
801 virtual_addr += PAGE_SIZE * MAX_PDE32PAE_ENTRIES * MAX_PTE32PAE_ENTRIES;