2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
21 static inline int activate_shadow_pt_32(struct guest_info * core) {
22 struct cr3_32 * shadow_cr3 = (struct cr3_32 *)&(core->ctrl_regs.cr3);
23 struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(core->shdw_pg_state.guest_cr3);
24 addr_t gpa = BASE_TO_PAGE_ADDR_4KB(guest_cr3->pdt_base_addr);
25 struct shdw_pg_data * shdw_pg = NULL;
27 PrintDebug("Activating 32 Bit cacheable page tables\n");
28 shdw_pg = find_shdw_pt(core->vm_info, gpa, PAGE_PD32);
30 PrintError("shdw_pg returned as %p\n", shdw_pg);
32 if (shdw_pg == NULL) {
33 shdw_pg = create_shdw_pt(core->vm_info, gpa, PAGE_PD32);
36 PrintDebug("shdw_pg now exists...\n");
38 shadow_cr3->pdt_base_addr = PAGE_BASE_ADDR_4KB(shdw_pg->hpa);
39 shadow_cr3->pwt = guest_cr3->pwt;
40 shadow_cr3->pcd = guest_cr3->pcd;
50 * * 32 bit Page table fault handlers
55 static int handle_4MB_shadow_pagefault_pde_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code,
56 pt_access_status_t shadow_pde_access, pde32_4MB_t * large_shadow_pde,
57 pde32_4MB_t * large_guest_pde);
59 static int handle_4MB_shadow_pagefault_pte_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code,
60 pte32_t * shadow_pt, pde32_4MB_t * large_guest_pde, struct shdw_pg_data * pt_pg_data);
62 static int handle_pte_shadow_pagefault_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code,
63 pte32_t * shadow_pt, pte32_t * guest_pt, struct shdw_pg_data * pt_pg_data);
68 static inline int handle_shadow_pagefault_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
69 pde32_t * guest_pd = NULL;
70 pde32_t * shadow_pd = CR3_TO_PDE32_VA(core->ctrl_regs.cr3);
71 addr_t guest_cr3 = CR3_TO_PDE32_PA(core->shdw_pg_state.guest_cr3);
72 pt_access_status_t guest_pde_access;
73 pt_access_status_t shadow_pde_access;
74 pde32_t * guest_pde = NULL;
75 pde32_t * shadow_pde = (pde32_t *)&(shadow_pd[PDE32_INDEX(fault_addr)]);
77 PrintDebug("Shadow cache page fault handler: %p\n", (void *)fault_addr );
78 PrintDebug("Handling PDE32 Fault\n");
80 if (v3_gpa_to_hva(core, guest_cr3, (addr_t*)&guest_pd) == -1) {
81 PrintError("Invalid Guest PDE Address: 0x%p\n", (void *)guest_cr3);
85 guest_pde = (pde32_t *)&(guest_pd[PDE32_INDEX(fault_addr)]);
87 // Check the guest page permissions
88 guest_pde_access = v3_can_access_pde32(guest_pd, fault_addr, error_code);
90 // Check the shadow page permissions
91 shadow_pde_access = v3_can_access_pde32(shadow_pd, fault_addr, error_code);
93 /* Was the page fault caused by the Guest's page tables? */
94 if (v3_is_guest_pf(guest_pde_access, shadow_pde_access) == 1) {
95 PrintDebug("Injecting PDE pf to guest: (guest access error=%d) (shdw access error=%d) (pf error code=%d)\n",
96 *(uint_t *)&guest_pde_access, *(uint_t *)&shadow_pde_access, *(uint_t *)&error_code);
97 if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
98 PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
106 if (shadow_pde_access == PT_ACCESS_USER_ERROR) {
108 // PDE Entry marked non user
110 PrintDebug("Shadow Paging User access error (shadow_pde_access=0x%x, guest_pde_access=0x%x)\n",
111 shadow_pde_access, guest_pde_access);
113 if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
114 PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
118 } else if ((shadow_pde_access == PT_ACCESS_WRITE_ERROR) &&
119 (guest_pde->large_page == 1)) {
121 ((pde32_4MB_t *)guest_pde)->dirty = 1;
122 shadow_pde->writable = guest_pde->writable;
124 } else if ((shadow_pde_access != PT_ACCESS_NOT_PRESENT) &&
125 (shadow_pde_access != PT_ACCESS_OK)) {
126 // inject page fault in guest
127 if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
128 PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
131 PrintDebug("Unknown Error occurred (shadow_pde_access=%d)\n", shadow_pde_access);
132 PrintDebug("Manual Says to inject page fault into guest\n");
137 pte32_t * shadow_pt = NULL;
138 pte32_t * guest_pt = NULL;
141 /* Set up cache state */
142 addr_t gpa = BASE_TO_PAGE_ADDR_4KB(guest_pde->pt_base_addr);
145 struct shdw_pg_data * shdw_page = NULL;
146 page_type_t pt_type = PAGE_PT32;
148 if (guest_pde->large_page == 1) {
149 // Handle Large pages, for this we use the PAGE_4MB type...
153 shdw_page = find_shdw_pt(core->vm_info, gpa, pt_type);
155 if (shdw_page == NULL) {
156 shdw_page = create_shdw_pt(core->vm_info, gpa, pt_type);
159 // update current reverse map entries...
160 // We are now using this page in a PT, so:
161 // any existing writable mappings must be updated
162 update_rmap_entries(core->vm_info, gpa);
164 struct shdw_pg_data * parent_page = find_shdw_pt(core->vm_info, guest_cr3, PAGE_PD32);
166 if (parent_page != NULL) {
167 // add back pointer to PDE, if it exists
168 link_shdw_pg(shdw_page, parent_page, PAGE_ADDR_4KB(fault_addr));
172 // Get the next shadow page level, allocate if not present
174 if (shadow_pde_access == PT_ACCESS_NOT_PRESENT) {
176 /* Currently we do not support large pages
177 This requires us to scan the large page for Page table pages, and split the entries if they exist.
178 Its easier to just ignore this for now...
180 if ((core->use_large_pages == 1) && (guest_pde->large_page == 1)) {
181 // Check underlying physical memory map to see if a large page is viable
182 addr_t gpa_4MB = BASE_TO_PAGE_ADDR_4MB(((pde32_4MB_t *)guest_pde)->page_base_addr);
183 uint32_t page_size = v3_get_max_page_size(core, gpa_4MB, PROTECTED);
185 if (page_size == PAGE_SIZE_4MB) {
186 PrintDebug("using large page for fault_addr %p (gpa=%p)\n", (void *)fault_addr, (void *)gpa_4MB);
187 if (handle_4MB_shadow_pagefault_pde_32(core, fault_addr, error_code, shadow_pde_access,
188 (pde32_4MB_t *)shadow_pde, (pde32_4MB_t *)guest_pde) == -1) {
189 PrintError("Error handling large pagefault with large page\n");
201 shadow_pt = (pte32_t *)(shdw_page->hva);
205 shadow_pde->present = 1;
206 shadow_pde->user_page = guest_pde->user_page;
209 if (guest_pde->large_page == 0) {
210 shadow_pde->writable = guest_pde->writable;
212 // This large page flag is temporary until we can get a working cache....
213 ((pde32_4MB_t *)guest_pde)->vmm_info = V3_LARGE_PG;
215 if (error_code.write) {
216 shadow_pde->writable = guest_pde->writable;
217 ((pde32_4MB_t *)guest_pde)->dirty = 1;
219 shadow_pde->writable = 0;
220 ((pde32_4MB_t *)guest_pde)->dirty = 0;
224 // VMM Specific options
225 shadow_pde->write_through = guest_pde->write_through;
226 shadow_pde->cache_disable = guest_pde->cache_disable;
227 shadow_pde->global_page = guest_pde->global_page;
230 guest_pde->accessed = 1;
232 shadow_pde->pt_base_addr = PAGE_BASE_ADDR(shdw_page->hpa);
234 shadow_pt = (pte32_t *)V3_VAddr((void *)BASE_TO_PAGE_ADDR(shadow_pde->pt_base_addr));
238 if (guest_pde->large_page == 0) {
239 if (v3_gpa_to_hva(core, BASE_TO_PAGE_ADDR(guest_pde->pt_base_addr), (addr_t*)&guest_pt) == -1) {
240 // Machine check the guest
241 PrintDebug("Invalid Guest PTE Address: 0x%p\n", (void *)BASE_TO_PAGE_ADDR(guest_pde->pt_base_addr));
242 v3_raise_exception(core, MC_EXCEPTION);
246 if (handle_pte_shadow_pagefault_32(core, fault_addr, error_code, shadow_pt, guest_pt, shdw_page) == -1) {
247 PrintError("Error handling Page fault caused by PTE\n");
251 if (handle_4MB_shadow_pagefault_pte_32(core, fault_addr, error_code, shadow_pt, (pde32_4MB_t *)guest_pde, shdw_page) == -1) {
252 PrintError("Error handling large pagefault\n");
263 static int handle_pte_shadow_pagefault_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code,
264 pte32_t * shadow_pt, pte32_t * guest_pt, struct shdw_pg_data * pt_pg_data) {
266 pt_access_status_t guest_pte_access;
267 pt_access_status_t shadow_pte_access;
268 pte32_t * guest_pte = (pte32_t *)&(guest_pt[PTE32_INDEX(fault_addr)]);;
269 pte32_t * shadow_pte = (pte32_t *)&(shadow_pt[PTE32_INDEX(fault_addr)]);
270 addr_t guest_pa = BASE_TO_PAGE_ADDR((addr_t)(guest_pte->page_base_addr)) + PAGE_OFFSET(fault_addr);
272 struct v3_mem_region * shdw_reg = v3_get_mem_region(core->vm_info, core->cpu_id, guest_pa);
274 if (shdw_reg == NULL) {
275 // Inject a machine check in the guest
276 PrintDebug("Invalid Guest Address in page table (0x%p)\n", (void *)guest_pa);
277 v3_raise_exception(core, MC_EXCEPTION);
281 // Check the guest page permissions
282 guest_pte_access = v3_can_access_pte32(guest_pt, fault_addr, error_code);
284 // Check the shadow page permissions
285 shadow_pte_access = v3_can_access_pte32(shadow_pt, fault_addr, error_code);
288 /* Was the page fault caused by the Guest's page tables? */
289 if (v3_is_guest_pf(guest_pte_access, shadow_pte_access) == 1) {
291 PrintDebug("Access error injecting pf to guest (guest access error=%d) (pf error code=%d)\n",
292 guest_pte_access, *(uint_t*)&error_code);
296 if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
297 PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
306 if (shadow_pte_access == PT_ACCESS_OK) {
307 // Inconsistent state...
308 // Guest Re-Entry will flush page tables and everything should now work
309 PrintDebug("Inconsistent state... Guest re-entry should flush tlb\n");
314 if (shadow_pte_access == PT_ACCESS_NOT_PRESENT) {
315 // Page Table Entry Not Present
316 PrintDebug("guest_pa =%p\n", (void *)guest_pa);
318 if ((shdw_reg->flags.alloced == 1) && (shdw_reg->flags.read == 1)) {
319 addr_t shadow_pa = 0;
321 if (v3_gpa_to_hpa(core, guest_pa, &shadow_pa) == -1) {
322 PrintError("could not translate page fault address (%p)\n", (void *)guest_pa);
326 shadow_pte->page_base_addr = PAGE_BASE_ADDR(shadow_pa);
328 PrintDebug("\tMapping shadow page (%p)\n", (void *)BASE_TO_PAGE_ADDR(shadow_pte->page_base_addr));
330 shadow_pte->present = guest_pte->present;
331 shadow_pte->user_page = guest_pte->user_page;
333 //set according to VMM policy
334 shadow_pte->write_through = guest_pte->write_through;
335 shadow_pte->cache_disable = guest_pte->cache_disable;
336 shadow_pte->global_page = guest_pte->global_page;
339 guest_pte->accessed = 1;
341 if (guest_pte->dirty == 1) {
342 shadow_pte->writable = guest_pte->writable;
343 } else if ((guest_pte->dirty == 0) && (error_code.write == 1)) {
344 shadow_pte->writable = guest_pte->writable;
345 guest_pte->dirty = 1;
346 } else if ((guest_pte->dirty == 0) && (error_code.write == 0)) {
347 shadow_pte->writable = 0;
351 if (shdw_reg->flags.write == 0) {
352 shadow_pte->writable = 0;
356 // Add this PTE to the reverse map...
357 // This allows us to update this PTE entry if it gets turned into a PT page
358 add_rmap(core->vm_info, pt_pg_data, PAGE_ADDR_4KB(guest_pa), PAGE_ADDR_4KB(fault_addr));
360 // Check for cache entries and mark page read-only, plus tag
362 struct shdw_pg_data * pt_page = NULL;
363 addr_t pg_gpa = PAGE_ADDR_4KB(guest_pa);
365 pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PT32);
367 if (pt_page == NULL) {
368 pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PD32);
371 if (pt_page != NULL) {
372 // This is a page table page...
373 shadow_pte->writable = 0;
374 shadow_pte->vmm_info = V3_CACHED_PG;
379 // Page fault on unhandled memory region
381 if (shdw_reg->unhandled(core, fault_addr, guest_pa, shdw_reg, error_code) == -1) {
382 PrintError("Special Page fault handler returned error for address: %p\n", (void *)fault_addr);
386 } else if (shadow_pte_access == PT_ACCESS_WRITE_ERROR) {
387 guest_pte->dirty = 1;
389 // check for cache tag and handle invalidations if it exists.
390 if (shadow_pte->vmm_info == V3_CACHED_PG) {
391 addr_t pg_gpa = PAGE_ADDR_4KB(guest_pa);
393 PrintError("Evicting on a small page\n");
396 if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PD32) == -1) {
397 PrintError("Error Evicting PAGE_PD32 cache entry\n");
401 if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PT32) == -1) {
402 PrintError("Error Evicting PAGE_PT32 cache entry\n");
406 shadow_pte->vmm_info &= ~V3_CACHED_PG;
410 if (shdw_reg->flags.write == 1) {
411 PrintDebug("Shadow PTE Write Error\n");
412 shadow_pte->writable = guest_pte->writable;
414 if (shdw_reg->unhandled(core, fault_addr, guest_pa, shdw_reg, error_code) == -1) {
415 PrintError("Special Page fault handler returned error for address: %p\n", (void *)fault_addr);
424 // Inject page fault into the guest
425 if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
426 PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
430 PrintError("PTE Page fault fell through... Not sure if this should ever happen\n");
431 PrintError("Manual Says to inject page fault into guest\n");
438 // Handle a 4MB page fault with small pages in the PTE
439 static int handle_4MB_shadow_pagefault_pte_32(struct guest_info * core,
440 addr_t fault_addr, pf_error_t error_code,
441 pte32_t * shadow_pt, pde32_4MB_t * large_guest_pde,
442 struct shdw_pg_data * pt_pg_data)
444 pt_access_status_t shadow_pte_access = v3_can_access_pte32(shadow_pt, fault_addr, error_code);
445 pte32_t * shadow_pte = (pte32_t *)&(shadow_pt[PTE32_INDEX(fault_addr)]);
446 addr_t guest_fault_pa = BASE_TO_PAGE_ADDR_4MB(large_guest_pde->page_base_addr) + PAGE_OFFSET_4MB(fault_addr);
449 PrintDebug("Handling 4MB fault (guest_fault_pa=%p) (error_code=%x)\n", (void *)guest_fault_pa, *(uint_t*)&error_code);
450 PrintDebug("ShadowPT=%p, LargeGuestPDE=%p\n", shadow_pt, large_guest_pde);
452 struct v3_mem_region * shdw_reg = v3_get_mem_region(core->vm_info, core->cpu_id, guest_fault_pa);
455 if (shdw_reg == NULL) {
456 // Inject a machine check in the guest
457 PrintDebug("Invalid Guest Address in page table (0x%p)\n", (void *)guest_fault_pa);
458 v3_raise_exception(core, MC_EXCEPTION);
462 if (shadow_pte_access == PT_ACCESS_OK) {
463 // Inconsistent state...
464 // Guest Re-Entry will flush tables and everything should now workd
465 PrintDebug("Inconsistent state... Guest re-entry should flush tlb\n");
470 if (shadow_pte_access == PT_ACCESS_NOT_PRESENT) {
471 // Get the guest physical address of the fault
473 if ((shdw_reg->flags.alloced == 1) &&
474 (shdw_reg->flags.read == 1)) {
475 addr_t shadow_pa = 0;
478 if (v3_gpa_to_hpa(core, guest_fault_pa, &shadow_pa) == -1) {
479 PrintError("could not translate page fault address (%p)\n", (void *)guest_fault_pa);
483 shadow_pte->page_base_addr = PAGE_BASE_ADDR(shadow_pa);
485 PrintDebug("\tMapping shadow page (%p)\n", (void *)BASE_TO_PAGE_ADDR(shadow_pte->page_base_addr));
487 shadow_pte->present = 1;
489 /* We are assuming that the PDE entry has precedence
490 * so the Shadow PDE will mirror the guest PDE settings,
491 * and we don't have to worry about them here
494 shadow_pte->user_page = 1;
496 //set according to VMM policy
497 shadow_pte->write_through = large_guest_pde->write_through;
498 shadow_pte->cache_disable = large_guest_pde->cache_disable;
499 shadow_pte->global_page = large_guest_pde->global_page;
503 if (shdw_reg->flags.write == 0) {
504 shadow_pte->writable = 0;
506 shadow_pte->writable = 1;
510 // Add this PTE to the reverse map...
511 // This allows us to update this PTE entry if it gets turned into a PT page sometime in the future
512 add_rmap(core->vm_info, pt_pg_data, PAGE_ADDR_4KB(guest_fault_pa), PAGE_ADDR_4KB(fault_addr));
514 // Check for cache entries and mark page read-only, plus tag
516 struct shdw_pg_data * pt_page = NULL;
517 addr_t pg_gpa = PAGE_ADDR_4KB(guest_fault_pa);
519 pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PT32);
521 if (pt_page == NULL) {
522 pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PD32);
525 if (pt_page != NULL) {
526 // This is a page table page...
527 shadow_pte->writable = 0;
528 shadow_pte->vmm_info = V3_CACHED_PG;
534 if (shdw_reg->unhandled(core, fault_addr, guest_fault_pa, shdw_reg, error_code) == -1) {
535 PrintError("Special Page Fault handler returned error for address: %p\n", (void *)fault_addr);
539 } else if (shadow_pte_access == PT_ACCESS_WRITE_ERROR) {
541 // check for cache tag and handle invalidations if it exists.
542 if (shadow_pte->vmm_info == V3_CACHED_PG) {
543 addr_t pg_gpa = PAGE_ADDR_4KB(guest_fault_pa);
544 PrintError("Evicting on a large page\n");
546 if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PD32) == -1) {
547 PrintError("Error Evicting PAGE_PD32 cache entry\n");
551 if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PT32) == -1) {
552 PrintError("Error Evicting PAGE_PT32 cache entry\n");
556 shadow_pte->vmm_info &= ~V3_CACHED_PG;
560 if (shdw_reg->flags.write == 0) {
561 if (shdw_reg->unhandled(core, fault_addr, guest_fault_pa, shdw_reg, error_code) == -1) {
562 PrintError("Special Page Fault handler returned error for address: %p\n", (void *)fault_addr);
566 // set writable after cache eviction, unless overruled by region setting
567 shadow_pte->writable = 1;
571 PrintError("Error in large page fault handler...\n");
572 PrintError("This case should have been handled at the top level handler\n");
576 PrintDebug("Returning from large page->small page fault handler\n");
581 /* If we start to optimize we should look up the guest pages in the cache... */
582 static inline int handle_shadow_invlpg_32(struct guest_info * core, addr_t vaddr) {
583 pde32_t * shadow_pd = (pde32_t *)CR3_TO_PDE32_VA(core->ctrl_regs.cr3);
584 pde32_t * shadow_pde = (pde32_t *)&shadow_pd[PDE32_INDEX(vaddr)];
586 addr_t guest_cr3 = CR3_TO_PDE32_PA(core->shdw_pg_state.guest_cr3);
587 pde32_t * guest_pd = NULL;
590 if (v3_gpa_to_hva(core, guest_cr3, (addr_t*)&guest_pd) == -1) {
591 PrintError("Invalid Guest PDE Address: 0x%p\n", (void *)guest_cr3);
595 guest_pde = (pde32_t *)&(guest_pd[PDE32_INDEX(vaddr)]);
598 if (guest_pde->large_page == 1) {
599 shadow_pde->present = 0;
600 PrintError("\tInvalidating Large Page (gpa=%p)\n", (void *)BASE_TO_PAGE_ADDR_4MB(guest_pde->pt_base_addr));
601 } else if (shadow_pde->present == 1) {
602 pte32_t * shadow_pt = (pte32_t *)(addr_t)BASE_TO_PAGE_ADDR_4KB(shadow_pde->pt_base_addr);
603 pte32_t * shadow_pte = (pte32_t *) V3_VAddr( (void*) &shadow_pt[PTE32_INDEX(vaddr)] );
607 PrintError("\tInvalidating small page\n");
610 shadow_pte->present = 0;
613 PrintError("What the fuck?\n");