2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_paging.h>
22 #include <palacios/vmm.h>
24 #include <palacios/vm_guest_mem.h>
28 #define USE_VMM_PAGING_DEBUG
29 // All of the debug functions defined in vmm_paging.h are implemented in this file
30 #include "vmm_paging_debug.h"
31 #undef USE_VMM_PAGING_DEBUG
34 void delete_page_tables_32(pde32_t * pde) {
41 for (i = 0; (i < MAX_PDE32_ENTRIES); i++) {
43 // We double cast, first to an addr_t to handle 64 bit issues, then to the pointer
44 PrintDebug("PTE base addr %x \n", pde[i].pt_base_addr);
45 pte32_t * pte = (pte32_t *)((addr_t)(uint_t)(pde[i].pt_base_addr << PAGE_POWER));
47 PrintDebug("Deleting PTE %d (%p)\n", i, pte);
52 PrintDebug("Deleting PDE (%p)\n", pde);
53 V3_FreePage(V3_PAddr(pde));
56 void delete_page_tables_32PAE(pdpe32pae_t * pdpe) {
57 PrintError("Unimplemented function\n");
60 void delete_page_tables_64(pml4e64_t * pml4) {
61 PrintError("Unimplemented function\n");
65 int translate_guest_pt_32(struct guest_info * info, addr_t guest_cr3, addr_t vaddr, addr_t * paddr) {
66 addr_t guest_pde_pa = CR3_TO_PDE32_PA((void *)guest_cr3);
67 pde32_t * guest_pde = 0;
69 if (guest_pa_to_host_va(info, guest_pde_pa, (addr_t*)&guest_pde) == -1) {
70 PrintError("In GVA->GPA: Invalid GPA(%p)->HVA PDE32 lookup\n",
80 int translate_host_pt_32(addr_t host_cr3, addr_t vaddr, addr_t * paddr) {
90 int pt32_lookup(pde32_t * pd, addr_t vaddr, addr_t * paddr) {
92 pde32_entry_type_t pde_entry_type;
98 pde_entry_type = pde32_lookup(pd, vaddr, &pde_entry);
100 if (pde_entry_type == PDE32_ENTRY_PTE32) {
101 return pte32_lookup((pte32_t *)pde_entry, vaddr, paddr);
102 } else if (pde_entry_type == PDE32_ENTRY_LARGE_PAGE) {
112 /* We can't do a full lookup because we don't know what context the page tables are in...
113 * The entry addresses could be pointing to either guest physical memory or host physical memory
114 * Instead we just return the entry address, and a flag to show if it points to a pte or a large page...
116 pde32_entry_type_t pde32_lookup(pde32_t * pd, addr_t addr, addr_t * entry) {
117 pde32_t * pde_entry = &(pd[PDE32_INDEX(addr)]);
119 if (!pde_entry->present) {
121 return PDE32_ENTRY_NOT_PRESENT;
124 if (pde_entry->large_page) {
125 pde32_4MB_t * large_pde = (pde32_4MB_t *)pde_entry;
127 *entry = PDE32_4MB_T_ADDR(*large_pde);
128 *entry += PD32_4MB_PAGE_OFFSET(addr);
129 return PDE32_ENTRY_LARGE_PAGE;
131 *entry = PDE32_T_ADDR(*pde_entry);
132 return PDE32_ENTRY_PTE32;
135 return PDE32_ENTRY_NOT_PRESENT;
140 /* Takes a virtual addr (addr) and returns the physical addr (entry) as defined in the page table
142 int pte32_lookup(pte32_t * pt, addr_t addr, addr_t * entry) {
143 pte32_t * pte_entry = &(pt[PTE32_INDEX(addr)]);
145 if (!pte_entry->present) {
147 PrintDebug("Lookup at non present page (index=%d)\n", PTE32_INDEX(addr));
150 *entry = PTE32_T_ADDR(*pte_entry) + PT32_PAGE_OFFSET(addr);
159 pt_access_status_t can_access_pde32(pde32_t * pde, addr_t addr, pf_error_t access_type) {
160 pde32_t * entry = &pde[PDE32_INDEX(addr)];
162 if (entry->present == 0) {
163 return PT_ENTRY_NOT_PRESENT;
164 } else if ((entry->writable == 0) && (access_type.write == 1)) {
165 return PT_WRITE_ERROR;
166 } else if ((entry->user_page == 0) && (access_type.user == 1)) {
168 return PT_USER_ERROR;
175 pt_access_status_t can_access_pte32(pte32_t * pte, addr_t addr, pf_error_t access_type) {
176 pte32_t * entry = &pte[PTE32_INDEX(addr)];
178 if (entry->present == 0) {
179 return PT_ENTRY_NOT_PRESENT;
180 } else if ((entry->writable == 0) && (access_type.write == 1)) {
181 return PT_WRITE_ERROR;
182 } else if ((entry->user_page == 0) && (access_type.user == 1)) {
184 return PT_USER_ERROR;
193 /* We generate a page table to correspond to a given memory layout
194 * pulling pages from the mem_list when necessary
195 * If there are any gaps in the layout, we add them as unmapped pages
197 pde32_t * create_passthrough_pts_32(struct guest_info * guest_info) {
198 addr_t current_page_addr = 0;
200 struct shadow_map * map = &(guest_info->mem_map);
202 pde32_t * pde = V3_VAddr(V3_AllocPages(1));
204 for (i = 0; i < MAX_PDE32_ENTRIES; i++) {
206 pte32_t * pte = V3_VAddr(V3_AllocPages(1));
209 for (j = 0; j < MAX_PTE32_ENTRIES; j++) {
210 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
213 (region->host_type == HOST_REGION_HOOK) ||
214 (region->host_type == HOST_REGION_UNALLOCATED) ||
215 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
216 (region->host_type == HOST_REGION_REMOTE) ||
217 (region->host_type == HOST_REGION_SWAPPED)) {
220 pte[j].user_page = 0;
221 pte[j].write_through = 0;
222 pte[j].cache_disable = 0;
226 pte[j].global_page = 0;
228 pte[j].page_base_addr = 0;
233 pte[j].user_page = 1;
234 pte[j].write_through = 0;
235 pte[j].cache_disable = 0;
239 pte[j].global_page = 0;
242 if (guest_pa_to_host_pa(guest_info, current_page_addr, &host_addr) == -1) {
248 pte[j].page_base_addr = host_addr >> 12;
253 current_page_addr += PAGE_SIZE;
256 if (pte_present == 0) {
257 V3_FreePage(V3_PAddr(pte));
261 pde[i].user_page = 0;
262 pde[i].write_through = 0;
263 pde[i].cache_disable = 0;
266 pde[i].large_page = 0;
267 pde[i].global_page = 0;
269 pde[i].pt_base_addr = 0;
273 pde[i].user_page = 1;
274 pde[i].write_through = 0;
275 pde[i].cache_disable = 0;
278 pde[i].large_page = 0;
279 pde[i].global_page = 0;
281 pde[i].pt_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pte));
290 /* We generate a page table to correspond to a given memory layout
291 * pulling pages from the mem_list when necessary
292 * If there are any gaps in the layout, we add them as unmapped pages
294 pdpe32pae_t * create_passthrough_pts_32PAE(struct guest_info * guest_info) {
295 addr_t current_page_addr = 0;
297 struct shadow_map * map = &(guest_info->mem_map);
299 pdpe32pae_t * pdpe = V3_VAddr(V3_AllocPages(1));
300 memset(pdpe, 0, PAGE_SIZE);
302 for (i = 0; i < MAX_PDPE32PAE_ENTRIES; i++) {
304 pde32pae_t * pde = V3_VAddr(V3_AllocPages(1));
306 for (j = 0; j < MAX_PDE32PAE_ENTRIES; j++) {
310 pte32pae_t * pte = V3_VAddr(V3_AllocPages(1));
313 for (k = 0; k < MAX_PTE32PAE_ENTRIES; k++) {
314 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
317 (region->host_type == HOST_REGION_HOOK) ||
318 (region->host_type == HOST_REGION_UNALLOCATED) ||
319 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
320 (region->host_type == HOST_REGION_REMOTE) ||
321 (region->host_type == HOST_REGION_SWAPPED)) {
324 pte[k].user_page = 0;
325 pte[k].write_through = 0;
326 pte[k].cache_disable = 0;
330 pte[k].global_page = 0;
332 pte[k].page_base_addr = 0;
338 pte[k].user_page = 1;
339 pte[k].write_through = 0;
340 pte[k].cache_disable = 0;
344 pte[k].global_page = 0;
347 if (guest_pa_to_host_pa(guest_info, current_page_addr, &host_addr) == -1) {
353 pte[k].page_base_addr = host_addr >> 12;
359 current_page_addr += PAGE_SIZE;
362 if (pte_present == 0) {
363 V3_FreePage(V3_PAddr(pte));
367 pde[j].user_page = 0;
368 pde[j].write_through = 0;
369 pde[j].cache_disable = 0;
372 pde[j].large_page = 0;
373 pde[j].global_page = 0;
375 pde[j].pt_base_addr = 0;
380 pde[j].user_page = 1;
381 pde[j].write_through = 0;
382 pde[j].cache_disable = 0;
385 pde[j].large_page = 0;
386 pde[j].global_page = 0;
388 pde[j].pt_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pte));
396 if (pde_present == 0) {
397 V3_FreePage(V3_PAddr(pde));
401 pdpe[i].write_through = 0;
402 pdpe[i].cache_disable = 0;
403 pdpe[i].accessed = 0;
406 pdpe[i].vmm_info = 0;
407 pdpe[i].pd_base_addr = 0;
412 pdpe[i].write_through = 0;
413 pdpe[i].cache_disable = 0;
414 pdpe[i].accessed = 0;
417 pdpe[i].vmm_info = 0;
418 pdpe[i].pd_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pde));
433 pml4e64_t * create_passthrough_pts_64(struct guest_info * info) {
434 addr_t current_page_addr = 0;
436 struct shadow_map * map = &(info->mem_map);
438 pml4e64_t * pml = V3_VAddr(V3_AllocPages(1));
440 for (i = 0; i < 1; i++) {
441 int pdpe_present = 0;
442 pdpe64_t * pdpe = V3_VAddr(V3_AllocPages(1));
444 for (j = 0; j < 20; j++) {
446 pde64_t * pde = V3_VAddr(V3_AllocPages(1));
448 for (k = 0; k < MAX_PDE64_ENTRIES; k++) {
450 pte64_t * pte = V3_VAddr(V3_AllocPages(1));
453 for (m = 0; m < MAX_PTE64_ENTRIES; m++) {
454 struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr);
459 (region->host_type == HOST_REGION_HOOK) ||
460 (region->host_type == HOST_REGION_UNALLOCATED) ||
461 (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) ||
462 (region->host_type == HOST_REGION_REMOTE) ||
463 (region->host_type == HOST_REGION_SWAPPED)) {
466 pte[m].user_page = 0;
467 pte[m].write_through = 0;
468 pte[m].cache_disable = 0;
472 pte[m].global_page = 0;
474 pte[m].page_base_addr = 0;
479 pte[m].user_page = 1;
480 pte[m].write_through = 0;
481 pte[m].cache_disable = 0;
485 pte[m].global_page = 0;
488 if (guest_pa_to_host_pa(info, current_page_addr, &host_addr) == -1) {
494 pte[m].page_base_addr = PTE64_BASE_ADDR(host_addr);
496 //PrintPTE64(current_page_addr, &(pte[m]));
504 current_page_addr += PAGE_SIZE;
507 if (pte_present == 0) {
508 V3_FreePage(V3_PAddr(pte));
512 pde[k].user_page = 0;
513 pde[k].write_through = 0;
514 pde[k].cache_disable = 0;
517 pde[k].large_page = 0;
518 //pde[k].global_page = 0;
520 pde[k].pt_base_addr = 0;
524 pde[k].user_page = 1;
525 pde[k].write_through = 0;
526 pde[k].cache_disable = 0;
529 pde[k].large_page = 0;
530 //pde[k].global_page = 0;
532 pde[k].pt_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pte));
538 if (pde_present == 0) {
539 V3_FreePage(V3_PAddr(pde));
542 pdpe[j].writable = 0;
543 pdpe[j].user_page = 0;
544 pdpe[j].write_through = 0;
545 pdpe[j].cache_disable = 0;
546 pdpe[j].accessed = 0;
547 pdpe[j].reserved = 0;
548 pdpe[j].large_page = 0;
549 //pdpe[j].global_page = 0;
550 pdpe[j].vmm_info = 0;
551 pdpe[j].pd_base_addr = 0;
554 pdpe[j].writable = 1;
555 pdpe[j].user_page = 1;
556 pdpe[j].write_through = 0;
557 pdpe[j].cache_disable = 0;
558 pdpe[j].accessed = 0;
559 pdpe[j].reserved = 0;
560 pdpe[j].large_page = 0;
561 //pdpe[j].global_page = 0;
562 pdpe[j].vmm_info = 0;
563 pdpe[j].pd_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pde));
571 PrintDebug("PML index=%d\n", i);
573 if (pdpe_present == 0) {
574 V3_FreePage(V3_PAddr(pdpe));
578 pml[i].user_page = 0;
579 pml[i].write_through = 0;
580 pml[i].cache_disable = 0;
583 //pml[i].large_page = 0;
584 //pml[i].global_page = 0;
586 pml[i].pdp_base_addr = 0;
590 pml[i].user_page = 1;
591 pml[i].write_through = 0;
592 pml[i].cache_disable = 0;
595 //pml[i].large_page = 0;
596 //pml[i].global_page = 0;
598 pml[i].pdp_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pdpe));