2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_paging.h>
22 #include <palacios/vmm.h>
24 #include <palacios/vm_guest_mem.h>
28 #define USE_VMM_PAGING_DEBUG
29 // All of the debug functions defined in vmm_paging.h are implemented in this file
30 #include "vmm_paging_debug.h"
31 #undef USE_VMM_PAGING_DEBUG
34 void delete_page_tables_32(pde32_t * pde) {
41 for (i = 0; (i < MAX_PDE32_ENTRIES); i++) {
43 // We double cast, first to an addr_t to handle 64 bit issues, then to the pointer
44 PrintDebug("PTE base addr %x \n", pde[i].pt_base_addr);
45 pte32_t * pte = (pte32_t *)((addr_t)(uint_t)(pde[i].pt_base_addr << PAGE_POWER));
47 PrintDebug("Deleting PTE %d (%p)\n", i, pte);
52 PrintDebug("Deleting PDE (%p)\n", pde);
53 V3_FreePage(V3_PAddr(pde));
56 void delete_page_tables_32PAE(pdpe32pae_t * pdpe) {
57 PrintError("Unimplemented function\n");
60 void delete_page_tables_64(pml4e64_t * pml4) {
61 PrintError("Unimplemented function\n");
65 int translate_guest_pt_32(struct guest_info * info, addr_t guest_cr3, addr_t vaddr, addr_t * paddr) {
66 addr_t guest_pde_pa = CR3_TO_PDE32_PA((void *)guest_cr3);
67 pde32_t * guest_pde = 0;
68 addr_t guest_pte_pa = 0;
70 if (guest_pa_to_host_va(info, guest_pde_pa, (addr_t*)&guest_pde) == -1) {
71 PrintError("Could not get virtual address of Guest PDE32 (PA=%p)\n",
72 (void *)guest_pde_pa);
76 switch (pde32_lookup(guest_pde, vaddr, &guest_pte_pa)) {
77 case PDE32_ENTRY_NOT_PRESENT:
80 case PDE32_ENTRY_LARGE_PAGE:
81 *paddr = guest_pte_pa;
83 case PDE32_ENTRY_PTE32:
86 if (guest_pa_to_host_va(info, guest_pte_pa, (addr_t*)&guest_pte) == -1) {
87 PrintError("Could not get virtual address of Guest PTE32 (PA=%p)\n",
88 (void *)guest_pte_pa);
92 if (pte32_lookup(guest_pte, vaddr, paddr) == -1) {
102 int translate_host_pt_32(addr_t host_cr3, addr_t vaddr, addr_t * paddr) {
103 pde32_t * host_pde = (pde32_t *)CR3_TO_PDE32_VA((void *)host_cr3);
104 pte32_t * host_pte = 0;
106 switch (pde32_lookup(host_pde, vaddr, (addr_t *)&host_pte)) {
107 case PDE32_ENTRY_NOT_PRESENT:
110 case PDE32_ENTRY_LARGE_PAGE:
111 *paddr = (addr_t)host_pte;
113 case PDE32_ENTRY_PTE32:
114 if (pte32_lookup(host_pte, vaddr, paddr) == -1) {
123 int translate_host_pt_32pae(addr_t host_cr3, addr_t vaddr, addr_t * paddr) {
124 pde32_t * host_pde = (pde32_t *)CR3_TO_PDE32_VA((void *)host_cr3);
125 pte32_t * host_pte = 0;
127 switch (pde32_lookup(host_pde, vaddr, (addr_t *)&host_pte)) {
128 case PDE32_ENTRY_NOT_PRESENT:
131 case PDE32_ENTRY_LARGE_PAGE:
132 *paddr = (addr_t)host_pte;
134 case PDE32_ENTRY_PTE32:
135 if (pte32_lookup(host_pte, vaddr, paddr) == -1) {
144 int translate_host_pt_64(addr_t host_cr3, addr_t vaddr, addr_t * paddr) {
145 pde32_t * host_pde = (pde32_t *)CR3_TO_PDE32_VA((void *)host_cr3);
146 pte32_t * host_pte = 0;
148 switch (pde32_lookup(host_pde, vaddr, (addr_t *)&host_pte)) {
149 case PDE32_ENTRY_NOT_PRESENT:
152 case PDE32_ENTRY_LARGE_PAGE:
153 *paddr = (addr_t)host_pte;
155 case PDE32_ENTRY_PTE32:
156 if (pte32_lookup(host_pte, vaddr, paddr) == -1) {
169 int pt32_lookup(pde32_t * pd, addr_t vaddr, addr_t * paddr) {
171 pde32_entry_type_t pde_entry_type;
177 pde_entry_type = pde32_lookup(pd, vaddr, &pde_entry);
179 if (pde_entry_type == PDE32_ENTRY_PTE32) {
180 return pte32_lookup((pte32_t *)pde_entry, vaddr, paddr);
181 } else if (pde_entry_type == PDE32_ENTRY_LARGE_PAGE) {
191 /* We can't do a full lookup because we don't know what context the page tables are in...
192 * The entry addresses could be pointing to either guest physical memory or host physical memory
193 * Instead we just return the entry address, and a flag to show if it points to a pte or a large page...
195 /* The value of entry is a return type:
196 * Page not present: *entry = 0
197 * Large Page: *entry = translated physical address (byte granularity)
198 * PTE entry: *entry is the address of the PTE Page
200 pde32_entry_type_t pde32_lookup(pde32_t * pd, addr_t addr, addr_t * entry) {
201 pde32_t * pde_entry = &(pd[PDE32_INDEX(addr)]);
203 if (!pde_entry->present) {
205 return PDE32_ENTRY_NOT_PRESENT;
208 if (pde_entry->large_page) {
209 pde32_4MB_t * large_pde = (pde32_4MB_t *)pde_entry;
211 *entry = BASE_TO_PAGE_ADDR_4MB(large_pde->page_base_addr);
212 *entry += PAGE_OFFSET_4MB(addr);
213 return PDE32_ENTRY_LARGE_PAGE;
215 *entry = BASE_TO_PAGE_ADDR(pde_entry->pt_base_addr);
216 return PDE32_ENTRY_PTE32;
219 return PDE32_ENTRY_NOT_PRESENT;
224 /* Takes a virtual addr (addr) and returns the physical addr (entry) as defined in the page table
226 int pte32_lookup(pte32_t * pt, addr_t addr, addr_t * entry) {
227 pte32_t * pte_entry = &(pt[PTE32_INDEX(addr)]);
229 if (!pte_entry->present) {
231 // PrintDebug("Lookup at non present page (index=%d)\n", PTE32_INDEX(addr));
234 *entry = BASE_TO_PAGE_ADDR(pte_entry->page_base_addr) + PAGE_OFFSET(addr);
242 int pdpe32pae_lookup(pdpe32pae_t * pdp, addr_t addr, addr_t * entry) {
243 pdpe32pae_t * pdpe_entry = &(pdp[PDPE32PAE_INDEX(addr)]);
245 if (!pdpe_entry->present) {
249 *entry = BASE_TO_PAGE_ADDR(pdpe_entry->pd_base_addr) + PAGE_OFFSET(addr);
258 pt_access_status_t can_access_pde32(pde32_t * pde, addr_t addr, pf_error_t access_type) {
259 pde32_t * entry = &pde[PDE32_INDEX(addr)];
261 if (entry->present == 0) {
262 return PT_ENTRY_NOT_PRESENT;
263 } else if ((entry->writable == 0) && (access_type.write == 1)) {
264 return PT_WRITE_ERROR;
265 } else if ((entry->user_page == 0) && (access_type.user == 1)) {
267 return PT_USER_ERROR;
274 pt_access_status_t can_access_pte32(pte32_t * pte, addr_t addr, pf_error_t access_type) {
275 pte32_t * entry = &pte[PTE32_INDEX(addr)];
277 if (entry->present == 0) {
278 return PT_ENTRY_NOT_PRESENT;
279 } else if ((entry->writable == 0) && (access_type.write == 1)) {
280 return PT_WRITE_ERROR;
281 } else if ((entry->user_page == 0) && (access_type.user == 1)) {
283 return PT_USER_ERROR;
292 /* We generate a page table to correspond to a given memory layout
293 * pulling pages from the mem_list when necessary
294 * If there are any gaps in the layout, we add them as unmapped pages
296 pde32_t * create_passthrough_pts_32(struct guest_info * guest_info) {
297 addr_t current_page_addr = 0;
299 struct shadow_map * map = &(guest_info->mem_map);
301 pde32_t * pde = V3_VAddr(V3_AllocPages(1));
303 for (i = 0; i < MAX_PDE32_ENTRIES; i++) {
305 pte32_t * pte = V3_VAddr(V3_AllocPages(1));
308 for (j = 0; j < MAX_PTE32_ENTRIES; j++) {
309 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
312 (region->host_type == HOST_REGION_HOOK) ||
313 (region->host_type == HOST_REGION_UNALLOCATED) ||
314 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
315 (region->host_type == HOST_REGION_REMOTE) ||
316 (region->host_type == HOST_REGION_SWAPPED)) {
319 pte[j].user_page = 0;
320 pte[j].write_through = 0;
321 pte[j].cache_disable = 0;
325 pte[j].global_page = 0;
327 pte[j].page_base_addr = 0;
332 pte[j].user_page = 1;
333 pte[j].write_through = 0;
334 pte[j].cache_disable = 0;
338 pte[j].global_page = 0;
341 if (guest_pa_to_host_pa(guest_info, current_page_addr, &host_addr) == -1) {
347 pte[j].page_base_addr = host_addr >> 12;
352 current_page_addr += PAGE_SIZE;
355 if (pte_present == 0) {
356 V3_FreePage(V3_PAddr(pte));
360 pde[i].user_page = 0;
361 pde[i].write_through = 0;
362 pde[i].cache_disable = 0;
365 pde[i].large_page = 0;
366 pde[i].global_page = 0;
368 pde[i].pt_base_addr = 0;
372 pde[i].user_page = 1;
373 pde[i].write_through = 0;
374 pde[i].cache_disable = 0;
377 pde[i].large_page = 0;
378 pde[i].global_page = 0;
380 pde[i].pt_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pte));
389 /* We generate a page table to correspond to a given memory layout
390 * pulling pages from the mem_list when necessary
391 * If there are any gaps in the layout, we add them as unmapped pages
393 pdpe32pae_t * create_passthrough_pts_32PAE(struct guest_info * guest_info) {
394 addr_t current_page_addr = 0;
396 struct shadow_map * map = &(guest_info->mem_map);
398 pdpe32pae_t * pdpe = V3_VAddr(V3_AllocPages(1));
399 memset(pdpe, 0, PAGE_SIZE);
401 for (i = 0; i < MAX_PDPE32PAE_ENTRIES; i++) {
403 pde32pae_t * pde = V3_VAddr(V3_AllocPages(1));
405 for (j = 0; j < MAX_PDE32PAE_ENTRIES; j++) {
409 pte32pae_t * pte = V3_VAddr(V3_AllocPages(1));
412 for (k = 0; k < MAX_PTE32PAE_ENTRIES; k++) {
413 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
416 (region->host_type == HOST_REGION_HOOK) ||
417 (region->host_type == HOST_REGION_UNALLOCATED) ||
418 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
419 (region->host_type == HOST_REGION_REMOTE) ||
420 (region->host_type == HOST_REGION_SWAPPED)) {
423 pte[k].user_page = 0;
424 pte[k].write_through = 0;
425 pte[k].cache_disable = 0;
429 pte[k].global_page = 0;
431 pte[k].page_base_addr = 0;
437 pte[k].user_page = 1;
438 pte[k].write_through = 0;
439 pte[k].cache_disable = 0;
443 pte[k].global_page = 0;
446 if (guest_pa_to_host_pa(guest_info, current_page_addr, &host_addr) == -1) {
452 pte[k].page_base_addr = host_addr >> 12;
458 current_page_addr += PAGE_SIZE;
461 if (pte_present == 0) {
462 V3_FreePage(V3_PAddr(pte));
466 pde[j].user_page = 0;
467 pde[j].write_through = 0;
468 pde[j].cache_disable = 0;
471 pde[j].large_page = 0;
472 pde[j].global_page = 0;
474 pde[j].pt_base_addr = 0;
479 pde[j].user_page = 1;
480 pde[j].write_through = 0;
481 pde[j].cache_disable = 0;
484 pde[j].large_page = 0;
485 pde[j].global_page = 0;
487 pde[j].pt_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pte));
495 if (pde_present == 0) {
496 V3_FreePage(V3_PAddr(pde));
500 pdpe[i].write_through = 0;
501 pdpe[i].cache_disable = 0;
502 pdpe[i].accessed = 0;
505 pdpe[i].vmm_info = 0;
506 pdpe[i].pd_base_addr = 0;
511 pdpe[i].write_through = 0;
512 pdpe[i].cache_disable = 0;
513 pdpe[i].accessed = 0;
516 pdpe[i].vmm_info = 0;
517 pdpe[i].pd_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pde));
532 pml4e64_t * create_passthrough_pts_64(struct guest_info * info) {
533 addr_t current_page_addr = 0;
535 struct shadow_map * map = &(info->mem_map);
537 pml4e64_t * pml = V3_VAddr(V3_AllocPages(1));
539 for (i = 0; i < 1; i++) {
540 int pdpe_present = 0;
541 pdpe64_t * pdpe = V3_VAddr(V3_AllocPages(1));
543 for (j = 0; j < 20; j++) {
545 pde64_t * pde = V3_VAddr(V3_AllocPages(1));
547 for (k = 0; k < MAX_PDE64_ENTRIES; k++) {
549 pte64_t * pte = V3_VAddr(V3_AllocPages(1));
552 for (m = 0; m < MAX_PTE64_ENTRIES; m++) {
553 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
558 (region->host_type == HOST_REGION_HOOK) ||
559 (region->host_type == HOST_REGION_UNALLOCATED) ||
560 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
561 (region->host_type == HOST_REGION_REMOTE) ||
562 (region->host_type == HOST_REGION_SWAPPED)) {
565 pte[m].user_page = 0;
566 pte[m].write_through = 0;
567 pte[m].cache_disable = 0;
571 pte[m].global_page = 0;
573 pte[m].page_base_addr = 0;
578 pte[m].user_page = 1;
579 pte[m].write_through = 0;
580 pte[m].cache_disable = 0;
584 pte[m].global_page = 0;
587 if (guest_pa_to_host_pa(info, current_page_addr, &host_addr) == -1) {
593 pte[m].page_base_addr = PAGE_BASE_ADDR(host_addr);
595 //PrintPTE64(current_page_addr, &(pte[m]));
603 current_page_addr += PAGE_SIZE;
606 if (pte_present == 0) {
607 V3_FreePage(V3_PAddr(pte));
611 pde[k].user_page = 0;
612 pde[k].write_through = 0;
613 pde[k].cache_disable = 0;
616 pde[k].large_page = 0;
617 //pde[k].global_page = 0;
619 pde[k].pt_base_addr = 0;
623 pde[k].user_page = 1;
624 pde[k].write_through = 0;
625 pde[k].cache_disable = 0;
628 pde[k].large_page = 0;
629 //pde[k].global_page = 0;
631 pde[k].pt_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pte));
637 if (pde_present == 0) {
638 V3_FreePage(V3_PAddr(pde));
641 pdpe[j].writable = 0;
642 pdpe[j].user_page = 0;
643 pdpe[j].write_through = 0;
644 pdpe[j].cache_disable = 0;
645 pdpe[j].accessed = 0;
646 pdpe[j].reserved = 0;
647 pdpe[j].large_page = 0;
648 //pdpe[j].global_page = 0;
649 pdpe[j].vmm_info = 0;
650 pdpe[j].pd_base_addr = 0;
653 pdpe[j].writable = 1;
654 pdpe[j].user_page = 1;
655 pdpe[j].write_through = 0;
656 pdpe[j].cache_disable = 0;
657 pdpe[j].accessed = 0;
658 pdpe[j].reserved = 0;
659 pdpe[j].large_page = 0;
660 //pdpe[j].global_page = 0;
661 pdpe[j].vmm_info = 0;
662 pdpe[j].pd_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pde));
670 PrintDebug("PML index=%d\n", i);
672 if (pdpe_present == 0) {
673 V3_FreePage(V3_PAddr(pdpe));
677 pml[i].user_page = 0;
678 pml[i].write_through = 0;
679 pml[i].cache_disable = 0;
682 //pml[i].large_page = 0;
683 //pml[i].global_page = 0;
685 pml[i].pdp_base_addr = 0;
689 pml[i].user_page = 1;
690 pml[i].write_through = 0;
691 pml[i].cache_disable = 0;
694 //pml[i].large_page = 0;
695 //pml[i].global_page = 0;
697 pml[i].pdp_base_addr = PAGE_BASE_ADDR((addr_t)V3_PAddr(pdpe));