2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_paging.h>
22 #include <palacios/vmm.h>
24 #include <palacios/vm_guest_mem.h>
32 void delete_page_tables_32(pde32_t * pde) {
39 for (i = 0; (i < MAX_PDE32_ENTRIES); i++) {
41 // We double cast, first to an addr_t to handle 64 bit issues, then to the pointer
42 PrintDebug("PTE base addr %x \n", pde[i].pt_base_addr);
43 pte32_t * pte = (pte32_t *)((addr_t)(uint_t)(pde[i].pt_base_addr << PAGE_POWER));
45 PrintDebug("Deleting PTE %d (%p)\n", i, pte);
50 PrintDebug("Deleting PDE (%p)\n", pde);
51 V3_FreePage(V3_PAddr(pde));
54 void delete_page_tables_32PAE(pdpe32pae_t * pdpe) {
55 PrintError("Unimplemented function\n");
58 void delete_page_tables_64(pml4e64_t * pml4) {
59 PrintError("Unimplemented function\n");
66 int pt32_lookup(pde32_t * pd, addr_t vaddr, addr_t * paddr) {
68 pde32_entry_type_t pde_entry_type;
74 pde_entry_type = pde32_lookup(pd, vaddr, &pde_entry);
76 if (pde_entry_type == PDE32_ENTRY_PTE32) {
77 return pte32_lookup((pte32_t *)pde_entry, vaddr, paddr);
78 } else if (pde_entry_type == PDE32_ENTRY_LARGE_PAGE) {
88 /* We can't do a full lookup because we don't know what context the page tables are in...
89 * The entry addresses could be pointing to either guest physical memory or host physical memory
90 * Instead we just return the entry address, and a flag to show if it points to a pte or a large page...
92 pde32_entry_type_t pde32_lookup(pde32_t * pd, addr_t addr, addr_t * entry) {
93 pde32_t * pde_entry = &(pd[PDE32_INDEX(addr)]);
95 if (!pde_entry->present) {
97 return PDE32_ENTRY_NOT_PRESENT;
100 if (pde_entry->large_page) {
101 pde32_4MB_t * large_pde = (pde32_4MB_t *)pde_entry;
103 *entry = PDE32_4MB_T_ADDR(*large_pde);
104 *entry += PD32_4MB_PAGE_OFFSET(addr);
105 return PDE32_ENTRY_LARGE_PAGE;
107 *entry = PDE32_T_ADDR(*pde_entry);
108 return PDE32_ENTRY_PTE32;
111 return PDE32_ENTRY_NOT_PRESENT;
116 /* Takes a virtual addr (addr) and returns the physical addr (entry) as defined in the page table
118 int pte32_lookup(pte32_t * pt, addr_t addr, addr_t * entry) {
119 pte32_t * pte_entry = &(pt[PTE32_INDEX(addr)]);
121 if (!pte_entry->present) {
123 PrintDebug("Lookup at non present page (index=%d)\n", PTE32_INDEX(addr));
126 *entry = PTE32_T_ADDR(*pte_entry) + PT32_PAGE_OFFSET(addr);
135 pt_access_status_t can_access_pde32(pde32_t * pde, addr_t addr, pf_error_t access_type) {
136 pde32_t * entry = &pde[PDE32_INDEX(addr)];
138 if (entry->present == 0) {
139 return PT_ENTRY_NOT_PRESENT;
140 } else if ((entry->writable == 0) && (access_type.write == 1)) {
141 return PT_WRITE_ERROR;
142 } else if ((entry->user_page == 0) && (access_type.user == 1)) {
144 return PT_USER_ERROR;
151 pt_access_status_t can_access_pte32(pte32_t * pte, addr_t addr, pf_error_t access_type) {
152 pte32_t * entry = &pte[PTE32_INDEX(addr)];
154 if (entry->present == 0) {
155 return PT_ENTRY_NOT_PRESENT;
156 } else if ((entry->writable == 0) && (access_type.write == 1)) {
157 return PT_WRITE_ERROR;
158 } else if ((entry->user_page == 0) && (access_type.user == 1)) {
160 return PT_USER_ERROR;
169 /* We generate a page table to correspond to a given memory layout
170 * pulling pages from the mem_list when necessary
171 * If there are any gaps in the layout, we add them as unmapped pages
173 pde32_t * create_passthrough_pts_32(struct guest_info * guest_info) {
174 addr_t current_page_addr = 0;
176 struct shadow_map * map = &(guest_info->mem_map);
178 pde32_t * pde = V3_VAddr(V3_AllocPages(1));
180 for (i = 0; i < MAX_PDE32_ENTRIES; i++) {
182 pte32_t * pte = V3_VAddr(V3_AllocPages(1));
185 for (j = 0; j < MAX_PTE32_ENTRIES; j++) {
186 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
189 (region->host_type == HOST_REGION_HOOK) ||
190 (region->host_type == HOST_REGION_UNALLOCATED) ||
191 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
192 (region->host_type == HOST_REGION_REMOTE) ||
193 (region->host_type == HOST_REGION_SWAPPED)) {
196 pte[j].user_page = 0;
197 pte[j].write_through = 0;
198 pte[j].cache_disable = 0;
202 pte[j].global_page = 0;
204 pte[j].page_base_addr = 0;
209 pte[j].user_page = 1;
210 pte[j].write_through = 0;
211 pte[j].cache_disable = 0;
215 pte[j].global_page = 0;
218 if (guest_pa_to_host_pa(guest_info, current_page_addr, &host_addr) == -1) {
224 pte[j].page_base_addr = host_addr >> 12;
229 current_page_addr += PAGE_SIZE;
232 if (pte_present == 0) {
233 V3_FreePage(V3_PAddr(pte));
237 pde[i].user_page = 0;
238 pde[i].write_through = 0;
239 pde[i].cache_disable = 0;
242 pde[i].large_page = 0;
243 pde[i].global_page = 0;
245 pde[i].pt_base_addr = 0;
249 pde[i].user_page = 1;
250 pde[i].write_through = 0;
251 pde[i].cache_disable = 0;
254 pde[i].large_page = 0;
255 pde[i].global_page = 0;
257 pde[i].pt_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pte));
266 /* We generate a page table to correspond to a given memory layout
267 * pulling pages from the mem_list when necessary
268 * If there are any gaps in the layout, we add them as unmapped pages
270 pdpe32pae_t * create_passthrough_pts_32PAE(struct guest_info * guest_info) {
271 addr_t current_page_addr = 0;
273 struct shadow_map * map = &(guest_info->mem_map);
275 pdpe32pae_t * pdpe = V3_VAddr(V3_AllocPages(1));
276 memset(pdpe, 0, PAGE_SIZE);
278 for (i = 0; i < MAX_PDPE32PAE_ENTRIES; i++) {
280 pde32pae_t * pde = V3_VAddr(V3_AllocPages(1));
282 for (j = 0; j < MAX_PDE32PAE_ENTRIES; j++) {
286 pte32pae_t * pte = V3_VAddr(V3_AllocPages(1));
289 for (k = 0; k < MAX_PTE32PAE_ENTRIES; k++) {
290 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
293 (region->host_type == HOST_REGION_HOOK) ||
294 (region->host_type == HOST_REGION_UNALLOCATED) ||
295 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
296 (region->host_type == HOST_REGION_REMOTE) ||
297 (region->host_type == HOST_REGION_SWAPPED)) {
300 pte[k].user_page = 0;
301 pte[k].write_through = 0;
302 pte[k].cache_disable = 0;
306 pte[k].global_page = 0;
308 pte[k].page_base_addr = 0;
314 pte[k].user_page = 1;
315 pte[k].write_through = 0;
316 pte[k].cache_disable = 0;
320 pte[k].global_page = 0;
323 if (guest_pa_to_host_pa(guest_info, current_page_addr, &host_addr) == -1) {
329 pte[k].page_base_addr = host_addr >> 12;
335 current_page_addr += PAGE_SIZE;
338 if (pte_present == 0) {
339 V3_FreePage(V3_PAddr(pte));
343 pde[j].user_page = 0;
344 pde[j].write_through = 0;
345 pde[j].cache_disable = 0;
348 pde[j].large_page = 0;
349 pde[j].global_page = 0;
351 pde[j].pt_base_addr = 0;
356 pde[j].user_page = 1;
357 pde[j].write_through = 0;
358 pde[j].cache_disable = 0;
361 pde[j].large_page = 0;
362 pde[j].global_page = 0;
364 pde[j].pt_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pte));
372 if (pde_present == 0) {
373 V3_FreePage(V3_PAddr(pde));
377 pdpe[i].write_through = 0;
378 pdpe[i].cache_disable = 0;
379 pdpe[i].accessed = 0;
382 pdpe[i].vmm_info = 0;
383 pdpe[i].pd_base_addr = 0;
388 pdpe[i].write_through = 0;
389 pdpe[i].cache_disable = 0;
390 pdpe[i].accessed = 0;
393 pdpe[i].vmm_info = 0;
394 pdpe[i].pd_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pde));
409 pml4e64_t * create_passthrough_pts_64(struct guest_info * info) {
410 addr_t current_page_addr = 0;
412 struct shadow_map * map = &(info->mem_map);
414 pml4e64_t * pml = V3_VAddr(V3_AllocPages(1));
416 for (i = 0; i < 1; i++) {
417 int pdpe_present = 0;
418 pdpe64_t * pdpe = V3_VAddr(V3_AllocPages(1));
420 for (j = 0; j < 20; j++) {
422 pde64_t * pde = V3_VAddr(V3_AllocPages(1));
424 for (k = 0; k < MAX_PDE64_ENTRIES; k++) {
426 pte64_t * pte = V3_VAddr(V3_AllocPages(1));
429 for (m = 0; m < MAX_PTE64_ENTRIES; m++) {
430 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
435 (region->host_type == HOST_REGION_HOOK) ||
436 (region->host_type == HOST_REGION_UNALLOCATED) ||
437 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
438 (region->host_type == HOST_REGION_REMOTE) ||
439 (region->host_type == HOST_REGION_SWAPPED)) {
442 pte[m].user_page = 0;
443 pte[m].write_through = 0;
444 pte[m].cache_disable = 0;
448 pte[m].global_page = 0;
450 pte[m].page_base_addr = 0;
455 pte[m].user_page = 1;
456 pte[m].write_through = 0;
457 pte[m].cache_disable = 0;
461 pte[m].global_page = 0;
464 if (guest_pa_to_host_pa(info, current_page_addr, &host_addr) == -1) {
470 pte[m].page_base_addr = PTE64_BASE_ADDR(host_addr);
472 //PrintPTE64(current_page_addr, &(pte[m]));
480 current_page_addr += PAGE_SIZE;
483 if (pte_present == 0) {
484 V3_FreePage(V3_PAddr(pte));
488 pde[k].user_page = 0;
489 pde[k].write_through = 0;
490 pde[k].cache_disable = 0;
493 pde[k].large_page = 0;
494 //pde[k].global_page = 0;
496 pde[k].pt_base_addr = 0;
500 pde[k].user_page = 1;
501 pde[k].write_through = 0;
502 pde[k].cache_disable = 0;
505 pde[k].large_page = 0;
506 //pde[k].global_page = 0;
508 pde[k].pt_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pte));
514 if (pde_present == 0) {
515 V3_FreePage(V3_PAddr(pde));
518 pdpe[j].writable = 0;
519 pdpe[j].user_page = 0;
520 pdpe[j].write_through = 0;
521 pdpe[j].cache_disable = 0;
522 pdpe[j].accessed = 0;
523 pdpe[j].reserved = 0;
524 pdpe[j].large_page = 0;
525 //pdpe[j].global_page = 0;
526 pdpe[j].vmm_info = 0;
527 pdpe[j].pd_base_addr = 0;
530 pdpe[j].writable = 1;
531 pdpe[j].user_page = 1;
532 pdpe[j].write_through = 0;
533 pdpe[j].cache_disable = 0;
534 pdpe[j].accessed = 0;
535 pdpe[j].reserved = 0;
536 pdpe[j].large_page = 0;
537 //pdpe[j].global_page = 0;
538 pdpe[j].vmm_info = 0;
539 pdpe[j].pd_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pde));
547 PrintDebug("PML index=%d\n", i);
549 if (pdpe_present == 0) {
550 V3_FreePage(V3_PAddr(pdpe));
554 pml[i].user_page = 0;
555 pml[i].write_through = 0;
556 pml[i].cache_disable = 0;
559 //pml[i].large_page = 0;
560 //pml[i].global_page = 0;
562 pml[i].pdp_base_addr = 0;
566 pml[i].user_page = 1;
567 pml[i].write_through = 0;
568 pml[i].cache_disable = 0;
571 //pml[i].large_page = 0;
572 //pml[i].global_page = 0;
574 pml[i].pdp_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pdpe));
588 void PrintPDE32(addr_t virtual_address, pde32_t * pde)
590 PrintDebug("PDE %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, reserved=%x, largePages=%x, globalPage=%x, kernelInfo=%x\n",
591 (void *)virtual_address,
592 (void *)(addr_t) (pde->pt_base_addr << PAGE_POWER),
606 void PrintPTE32(addr_t virtual_address, pte32_t * pte)
608 PrintDebug("PTE %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, dirty=%x, pteAttribute=%x, globalPage=%x, vmm_info=%x\n",
609 (void *)virtual_address,
610 (void*)(addr_t)(pte->page_base_addr << PAGE_POWER),
632 void PrintPD32(pde32_t * pde)
636 PrintDebug("Page Directory at %p:\n", pde);
637 for (i = 0; (i < MAX_PDE32_ENTRIES); i++) {
638 if ( pde[i].present) {
639 PrintPDE32((addr_t)(PAGE_SIZE * MAX_PTE32_ENTRIES * i), &(pde[i]));
644 void PrintPT32(addr_t starting_address, pte32_t * pte)
648 PrintDebug("Page Table at %p:\n", pte);
649 for (i = 0; (i < MAX_PTE32_ENTRIES) ; i++) {
650 if (pte[i].present) {
651 PrintPTE32(starting_address + (PAGE_SIZE * i), &(pte[i]));
662 void PrintDebugPageTables(pde32_t * pde)
666 PrintDebug("Dumping the pages starting with the pde page at %p\n", pde);
668 for (i = 0; (i < MAX_PDE32_ENTRIES); i++) {
669 if (pde[i].present) {
670 PrintPDE32((addr_t)(PAGE_SIZE * MAX_PTE32_ENTRIES * i), &(pde[i]));
671 PrintPT32((addr_t)(PAGE_SIZE * MAX_PTE32_ENTRIES * i), (pte32_t *)V3_VAddr((void *)(addr_t)(pde[i].pt_base_addr << PAGE_POWER)));
683 void PrintPDPE32PAE(addr_t virtual_address, pdpe32pae_t * pdpe)
685 PrintDebug("PDPE %p -> %p : present=%x, wt=%x, cd=%x, accessed=%x, kernelInfo=%x\n",
686 (void *)virtual_address,
687 (void *)(addr_t) (pdpe->pd_base_addr << PAGE_POWER),
695 void PrintPDE32PAE(addr_t virtual_address, pde32pae_t * pde)
697 PrintDebug("PDE %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, largePages=%x, globalPage=%x, kernelInfo=%x\n",
698 (void *)virtual_address,
699 (void *)(addr_t) (pde->pt_base_addr << PAGE_POWER),
712 void PrintPTE32PAE(addr_t virtual_address, pte32pae_t * pte)
714 PrintDebug("PTE %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, dirty=%x, pteAttribute=%x, globalPage=%x, vmm_info=%x\n",
715 (void *)virtual_address,
716 (void*)(addr_t)(pte->page_base_addr << PAGE_POWER),
734 void PrintDebugPageTables32PAE(pdpe32pae_t * pdpe)
739 addr_t virtual_addr = 0;
741 PrintDebug("Dumping the pages starting with the pde page at %p\n", pdpe);
743 for (i = 0; (i < MAX_PDPE32PAE_ENTRIES); i++) {
745 if (pdpe[i].present) {
746 pde = (pde32pae_t *)V3_VAddr((void *)(addr_t)BASE_TO_PAGE_ADDR(pdpe[i].pd_base_addr));
748 PrintPDPE32PAE(virtual_addr, &(pdpe[i]));
750 for (j = 0; j < MAX_PDE32PAE_ENTRIES; j++) {
752 if (pde[j].present) {
753 pte = (pte32pae_t *)V3_VAddr((void *)(addr_t)BASE_TO_PAGE_ADDR(pde[j].pt_base_addr));
755 PrintPDE32PAE(virtual_addr, &(pde[j]));
757 for (k = 0; k < MAX_PTE32PAE_ENTRIES; k++) {
758 if (pte[k].present) {
759 PrintPTE32PAE(virtual_addr, &(pte[k]));
762 virtual_addr += PAGE_SIZE;
765 virtual_addr += PAGE_SIZE * MAX_PTE32PAE_ENTRIES;
769 virtual_addr += PAGE_SIZE * MAX_PDE32PAE_ENTRIES * MAX_PTE32PAE_ENTRIES;
776 void PrintPML4e64(addr_t virtual_address, pml4e64_t * pml)
778 PrintDebug("PML4e64 %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, reserved=%x, kernelInfo=%x\n",
779 (void *)virtual_address,
780 (void *)(addr_t) (BASE_TO_PAGE_ADDR(pml->pdp_base_addr)),
791 void PrintPDPE64(addr_t virtual_address, pdpe64_t * pdpe)
793 PrintDebug("PDPE64 %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, reserved=%x, largePages=%x, globalPage=%x, kernelInfo=%x\n",
794 (void *)virtual_address,
795 (void *)(addr_t) (BASE_TO_PAGE_ADDR(pdpe->pd_base_addr)),
804 0,//pdpe->global_page,
810 void PrintPDE64(addr_t virtual_address, pde64_t * pde)
812 PrintDebug("PDE64 %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, reserved=%x, largePages=%x, globalPage=%x, kernelInfo=%x\n",
813 (void *)virtual_address,
814 (void *)(addr_t) (BASE_TO_PAGE_ADDR(pde->pt_base_addr)),
823 0,//pde->global_page,
828 void PrintPTE64(addr_t virtual_address, pte64_t * pte)
830 PrintDebug("PTE64 %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, dirty=%x, pteAttribute=%x, globalPage=%x, vmm_info=%x\n",
831 (void *)virtual_address,
832 (void*)(addr_t)(BASE_TO_PAGE_ADDR(pte->page_base_addr)),
849 void PrintPageTree_64(addr_t virtual_addr, pml4e64_t * pml) {
850 uint_t pml4_index = PML4E64_INDEX(virtual_addr);
851 uint_t pdpe_index = PDPE64_INDEX(virtual_addr);
852 uint_t pde_index = PDE64_INDEX(virtual_addr);
853 uint_t pte_index = PTE64_INDEX(virtual_addr);
855 PrintPML4e64(virtual_addr, &(pml[pml4_index]));
856 if (pml[pml4_index].present) {
857 pdpe64_t * pdpe = (pdpe64_t *)V3_VAddr((void *)(addr_t)BASE_TO_PAGE_ADDR(pml[pml4_index].pdp_base_addr));
858 PrintPDPE64(virtual_addr, &(pdpe[pdpe_index]));
860 if (pdpe[pdpe_index].present) {
861 pde64_t * pde = (pde64_t *)V3_VAddr((void *)(addr_t)BASE_TO_PAGE_ADDR(pdpe[pdpe_index].pd_base_addr));
862 PrintPDE64(virtual_addr, &(pde[pde_index]));
864 if (pde[pde_index].present) {
865 pte64_t * pte = (pte64_t *)V3_VAddr((void *)(addr_t)BASE_TO_PAGE_ADDR(pde[pde_index].pt_base_addr));
866 PrintPTE64(virtual_addr, &(pte[pte_index]));
878 void PrintPageTree(v3_vm_cpu_mode_t cpu_mode, addr_t virtual_addr, addr_t cr3) {
883 PrintPageTree_64(virtual_addr, CR3_TO_PML4E64_VA(cr3));
886 PrintError("Unsupported CPU MODE %d\n", cpu_mode);