2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
21 static inline int activate_shadow_pt_32(struct guest_info * core) {
22 struct cr3_32 * shadow_cr3 = (struct cr3_32 *)&(core->ctrl_regs.cr3);
23 struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(core->shdw_pg_state.guest_cr3);
24 addr_t gpa = BASE_TO_PAGE_ADDR_4KB(guest_cr3->pdt_base_addr);
25 struct shdw_pg_data * shdw_pg = NULL;
27 PrintDebug("Activating 32 Bit cacheable page tables\n");
28 shdw_pg = find_shdw_pt(core->vm_info, gpa, PAGE_PD32);
30 PrintError("shdw_pg returned as %p for CR3:%p\n", shdw_pg, (void *)gpa);
32 if (shdw_pg == NULL) {
33 shdw_pg = create_shdw_pt(core->vm_info, gpa, PAGE_PD32);
35 // update current reverse map entries...
36 // We are now using this page in a PT, so:
37 // any existing writable mappings must be updated
38 update_rmap_entries(core->vm_info, gpa);
41 PrintDebug("shdw_pg now exists...\n");
43 shadow_cr3->pdt_base_addr = PAGE_BASE_ADDR_4KB(shdw_pg->hpa);
44 shadow_cr3->pwt = guest_cr3->pwt;
45 shadow_cr3->pcd = guest_cr3->pcd;
55 * * 32 bit Page table fault handlers
60 static int handle_4MB_shadow_pagefault_pde_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code,
61 pt_access_status_t shadow_pde_access, pde32_4MB_t * large_shadow_pde,
62 pde32_4MB_t * large_guest_pde);
64 static int handle_4MB_shadow_pagefault_pte_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code,
65 pte32_t * shadow_pt, pde32_4MB_t * large_guest_pde, struct shdw_pg_data * pt_pg_data);
67 static int handle_pte_shadow_pagefault_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code,
68 pte32_t * shadow_pt, pte32_t * guest_pt, struct shdw_pg_data * pt_pg_data);
73 static inline int handle_shadow_pagefault_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
74 pde32_t * guest_pd = NULL;
75 pde32_t * shadow_pd = CR3_TO_PDE32_VA(core->ctrl_regs.cr3);
76 addr_t guest_cr3 = CR3_TO_PDE32_PA(core->shdw_pg_state.guest_cr3);
77 pt_access_status_t guest_pde_access;
78 pt_access_status_t shadow_pde_access;
79 pde32_t * guest_pde = NULL;
80 pde32_t * shadow_pde = (pde32_t *)&(shadow_pd[PDE32_INDEX(fault_addr)]);
82 PrintDebug("Shadow cache page fault handler: %p\n", (void *)fault_addr );
83 PrintDebug("Handling PDE32 Fault\n");
85 if (v3_gpa_to_hva(core, guest_cr3, (addr_t*)&guest_pd) == -1) {
86 PrintError("Invalid Guest PDE Address: 0x%p\n", (void *)guest_cr3);
90 guest_pde = (pde32_t *)&(guest_pd[PDE32_INDEX(fault_addr)]);
92 // Check the guest page permissions
93 guest_pde_access = v3_can_access_pde32(guest_pd, fault_addr, error_code);
95 // Check the shadow page permissions
96 shadow_pde_access = v3_can_access_pde32(shadow_pd, fault_addr, error_code);
98 /* Was the page fault caused by the Guest's page tables? */
99 if (v3_is_guest_pf(guest_pde_access, shadow_pde_access) == 1) {
100 PrintDebug("Injecting PDE pf to guest: (guest access error=%d) (shdw access error=%d) (pf error code=%d)\n",
101 *(uint_t *)&guest_pde_access, *(uint_t *)&shadow_pde_access, *(uint_t *)&error_code);
102 if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
103 PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
111 if (shadow_pde_access == PT_ACCESS_USER_ERROR) {
113 // PDE Entry marked non user
115 PrintDebug("Shadow Paging User access error (shadow_pde_access=0x%x, guest_pde_access=0x%x)\n",
116 shadow_pde_access, guest_pde_access);
118 if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
119 PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
123 } else if ((shadow_pde_access == PT_ACCESS_WRITE_ERROR) &&
124 (guest_pde->large_page == 1)) {
126 ((pde32_4MB_t *)guest_pde)->dirty = 1;
127 shadow_pde->writable = guest_pde->writable;
129 } else if ((shadow_pde_access != PT_ACCESS_NOT_PRESENT) &&
130 (shadow_pde_access != PT_ACCESS_OK)) {
131 // inject page fault in guest
132 if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
133 PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
136 PrintDebug("Unknown Error occurred (shadow_pde_access=%d)\n", shadow_pde_access);
137 PrintDebug("Manual Says to inject page fault into guest\n");
142 pte32_t * shadow_pt = NULL;
143 pte32_t * guest_pt = NULL;
146 /* Set up cache state */
147 addr_t gpa = BASE_TO_PAGE_ADDR_4KB(guest_pde->pt_base_addr);
150 struct shdw_pg_data * shdw_page = NULL;
151 page_type_t pt_type = PAGE_PT32;
153 if (guest_pde->large_page == 1) {
154 // Handle Large pages, for this we use the PAGE_4MB type...
158 shdw_page = find_shdw_pt(core->vm_info, gpa, pt_type);
160 if (shdw_page == NULL) {
161 shdw_page = create_shdw_pt(core->vm_info, gpa, pt_type);
163 if (pt_type == PAGE_PT32) {
164 // update current reverse map entries...
165 // We are now using this page in a PT, so:
166 // any existing writable mappings must be updated
167 update_rmap_entries(core->vm_info, gpa);
172 struct shdw_pg_data * parent_page = find_shdw_pt(core->vm_info, guest_cr3, PAGE_PD32);
174 if (parent_page != NULL) {
175 // add back pointer to PDE, if it exists
176 link_shdw_pg(shdw_page, parent_page, PAGE_ADDR_4KB(fault_addr));
180 // Get the next shadow page level, allocate if not present
182 if (shadow_pde_access == PT_ACCESS_NOT_PRESENT) {
184 /* Currently we do not support large pages
185 This requires us to scan the large page for Page table pages, and split the entries if they exist.
186 Its easier to just ignore this for now...
188 if ((core->use_large_pages == 1) && (guest_pde->large_page == 1)) {
189 // Check underlying physical memory map to see if a large page is viable
190 addr_t gpa_4MB = BASE_TO_PAGE_ADDR_4MB(((pde32_4MB_t *)guest_pde)->page_base_addr);
191 uint32_t page_size = v3_get_max_page_size(core, gpa_4MB, PROTECTED);
193 if (page_size == PAGE_SIZE_4MB) {
194 PrintDebug("using large page for fault_addr %p (gpa=%p)\n", (void *)fault_addr, (void *)gpa_4MB);
195 if (handle_4MB_shadow_pagefault_pde_32(core, fault_addr, error_code, shadow_pde_access,
196 (pde32_4MB_t *)shadow_pde, (pde32_4MB_t *)guest_pde) == -1) {
197 PrintError("Error handling large pagefault with large page\n");
209 shadow_pt = (pte32_t *)(shdw_page->hva);
213 shadow_pde->present = 1;
214 shadow_pde->user_page = guest_pde->user_page;
217 if (guest_pde->large_page == 0) {
218 shadow_pde->writable = guest_pde->writable;
220 // This large page flag is temporary until we can get a working cache....
221 ((pde32_4MB_t *)guest_pde)->vmm_info = V3_LARGE_PG;
223 if (error_code.write) {
224 shadow_pde->writable = guest_pde->writable;
225 ((pde32_4MB_t *)guest_pde)->dirty = 1;
227 shadow_pde->writable = 0;
228 ((pde32_4MB_t *)guest_pde)->dirty = 0;
232 // VMM Specific options
233 shadow_pde->write_through = guest_pde->write_through;
234 shadow_pde->cache_disable = guest_pde->cache_disable;
235 shadow_pde->global_page = guest_pde->global_page;
238 guest_pde->accessed = 1;
240 shadow_pde->pt_base_addr = PAGE_BASE_ADDR(shdw_page->hpa);
242 shadow_pt = (pte32_t *)V3_VAddr((void *)BASE_TO_PAGE_ADDR(shadow_pde->pt_base_addr));
246 if (guest_pde->large_page == 0) {
247 if (v3_gpa_to_hva(core, BASE_TO_PAGE_ADDR(guest_pde->pt_base_addr), (addr_t*)&guest_pt) == -1) {
248 // Machine check the guest
249 PrintDebug("Invalid Guest PTE Address: 0x%p\n", (void *)BASE_TO_PAGE_ADDR(guest_pde->pt_base_addr));
250 v3_raise_exception(core, MC_EXCEPTION);
254 if (handle_pte_shadow_pagefault_32(core, fault_addr, error_code, shadow_pt, guest_pt, shdw_page) == -1) {
255 PrintError("Error handling Page fault caused by PTE\n");
259 if (handle_4MB_shadow_pagefault_pte_32(core, fault_addr, error_code, shadow_pt, (pde32_4MB_t *)guest_pde, shdw_page) == -1) {
260 PrintError("Error handling large pagefault\n");
271 static int handle_pte_shadow_pagefault_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code,
272 pte32_t * shadow_pt, pte32_t * guest_pt, struct shdw_pg_data * pt_pg_data) {
274 pt_access_status_t guest_pte_access;
275 pt_access_status_t shadow_pte_access;
276 pte32_t * guest_pte = (pte32_t *)&(guest_pt[PTE32_INDEX(fault_addr)]);;
277 pte32_t * shadow_pte = (pte32_t *)&(shadow_pt[PTE32_INDEX(fault_addr)]);
278 addr_t guest_pa = BASE_TO_PAGE_ADDR((addr_t)(guest_pte->page_base_addr)) + PAGE_OFFSET(fault_addr);
280 struct v3_mem_region * shdw_reg = v3_get_mem_region(core->vm_info, core->vcpu_id, guest_pa);
282 if (shdw_reg == NULL) {
283 // Inject a machine check in the guest
284 PrintDebug("Invalid Guest Address in page table (0x%p)\n", (void *)guest_pa);
285 v3_raise_exception(core, MC_EXCEPTION);
289 // Check the guest page permissions
290 guest_pte_access = v3_can_access_pte32(guest_pt, fault_addr, error_code);
292 // Check the shadow page permissions
293 shadow_pte_access = v3_can_access_pte32(shadow_pt, fault_addr, error_code);
296 /* Was the page fault caused by the Guest's page tables? */
297 if (v3_is_guest_pf(guest_pte_access, shadow_pte_access) == 1) {
299 PrintDebug("Access error injecting pf to guest (guest access error=%d) (pf error code=%d)\n",
300 guest_pte_access, *(uint_t*)&error_code);
304 if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
305 PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
314 if (shadow_pte_access == PT_ACCESS_OK) {
315 // Inconsistent state...
316 // Guest Re-Entry will flush page tables and everything should now work
317 PrintDebug("Inconsistent state... Guest re-entry should flush tlb\n");
322 if (shadow_pte_access == PT_ACCESS_NOT_PRESENT) {
323 // Page Table Entry Not Present
324 PrintDebug("guest_pa =%p\n", (void *)guest_pa);
326 if ((shdw_reg->flags.alloced == 1) && (shdw_reg->flags.read == 1)) {
327 addr_t shadow_pa = 0;
329 if (v3_gpa_to_hpa(core, guest_pa, &shadow_pa) == -1) {
330 PrintError("could not translate page fault address (%p)\n", (void *)guest_pa);
334 shadow_pte->page_base_addr = PAGE_BASE_ADDR(shadow_pa);
336 PrintDebug("\tMapping shadow page (%p)\n", (void *)BASE_TO_PAGE_ADDR(shadow_pte->page_base_addr));
338 shadow_pte->present = guest_pte->present;
339 shadow_pte->user_page = guest_pte->user_page;
341 //set according to VMM policy
342 shadow_pte->write_through = guest_pte->write_through;
343 shadow_pte->cache_disable = guest_pte->cache_disable;
344 shadow_pte->global_page = guest_pte->global_page;
347 guest_pte->accessed = 1;
349 if (guest_pte->dirty == 1) {
350 shadow_pte->writable = guest_pte->writable;
351 } else if ((guest_pte->dirty == 0) && (error_code.write == 1)) {
352 shadow_pte->writable = guest_pte->writable;
353 guest_pte->dirty = 1;
354 } else if ((guest_pte->dirty == 0) && (error_code.write == 0)) {
355 shadow_pte->writable = 0;
359 if (shdw_reg->flags.write == 0) {
360 shadow_pte->writable = 0;
364 // Add this PTE to the reverse map...
365 // This allows us to update this PTE entry if it gets turned into a PT page
366 add_rmap(core->vm_info, pt_pg_data, PAGE_ADDR_4KB(guest_pa), PAGE_ADDR_4KB(fault_addr));
368 // Check for cache entries and mark page read-only, plus tag
370 struct shdw_pg_data * pt_page = NULL;
371 addr_t pg_gpa = PAGE_ADDR_4KB(guest_pa);
373 pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PT32);
375 if (pt_page == NULL) {
376 pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PD32);
379 if (pt_page != NULL) {
380 PrintError("Found PT page (small), marking RD-ONLY (va=%p), (gpa=%p)\n",
381 (void *)fault_addr, (void *)pg_gpa);
382 // This is a page table page...
383 shadow_pte->writable = 0;
384 shadow_pte->vmm_info = V3_CACHED_PG;
389 // Page fault on unhandled memory region
391 if (shdw_reg->unhandled(core, fault_addr, guest_pa, shdw_reg, error_code) == -1) {
392 PrintError("Special Page fault handler returned error for address: %p\n", (void *)fault_addr);
396 } else if (shadow_pte_access == PT_ACCESS_WRITE_ERROR) {
397 guest_pte->dirty = 1;
399 // check for cache tag and handle invalidations if it exists.
400 if (shadow_pte->vmm_info == V3_CACHED_PG) {
401 addr_t pg_gpa = PAGE_ADDR_4KB(guest_pa);
403 PrintError("Evicting on a small page\n");
406 if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PD32) == -1) {
407 PrintError("Error Evicting PAGE_PD32 cache entry\n");
411 if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PT32) == -1) {
412 PrintError("Error Evicting PAGE_PT32 cache entry\n");
416 shadow_pte->vmm_info &= ~V3_CACHED_PG;
420 if (shdw_reg->flags.write == 1) {
421 PrintDebug("Shadow PTE Write Error\n");
422 shadow_pte->writable = guest_pte->writable;
424 if (shdw_reg->unhandled(core, fault_addr, guest_pa, shdw_reg, error_code) == -1) {
425 PrintError("Special Page fault handler returned error for address: %p\n", (void *)fault_addr);
434 // Inject page fault into the guest
435 if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
436 PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
440 PrintError("PTE Page fault fell through... Not sure if this should ever happen\n");
441 PrintError("Manual Says to inject page fault into guest\n");
448 // Handle a 4MB page fault with small pages in the PTE
449 static int handle_4MB_shadow_pagefault_pte_32(struct guest_info * core,
450 addr_t fault_addr, pf_error_t error_code,
451 pte32_t * shadow_pt, pde32_4MB_t * large_guest_pde,
452 struct shdw_pg_data * pt_pg_data)
454 pt_access_status_t shadow_pte_access = v3_can_access_pte32(shadow_pt, fault_addr, error_code);
455 pte32_t * shadow_pte = (pte32_t *)&(shadow_pt[PTE32_INDEX(fault_addr)]);
456 addr_t guest_fault_pa = BASE_TO_PAGE_ADDR_4MB(large_guest_pde->page_base_addr) + PAGE_OFFSET_4MB(fault_addr);
459 PrintDebug("Handling 4MB fault (guest_fault_pa=%p) (error_code=%x)\n", (void *)guest_fault_pa, *(uint_t*)&error_code);
460 PrintDebug("ShadowPT=%p, LargeGuestPDE=%p\n", shadow_pt, large_guest_pde);
462 struct v3_mem_region * shdw_reg = v3_get_mem_region(core->vm_info, core->vcpu_id, guest_fault_pa);
465 if (shdw_reg == NULL) {
466 // Inject a machine check in the guest
467 PrintDebug("Invalid Guest Address in page table (0x%p)\n", (void *)guest_fault_pa);
468 v3_raise_exception(core, MC_EXCEPTION);
472 if (shadow_pte_access == PT_ACCESS_OK) {
473 // Inconsistent state...
474 // Guest Re-Entry will flush tables and everything should now workd
475 PrintDebug("Inconsistent state... Guest re-entry should flush tlb\n");
480 if (shadow_pte_access == PT_ACCESS_NOT_PRESENT) {
481 // Get the guest physical address of the fault
483 if ((shdw_reg->flags.alloced == 1) &&
484 (shdw_reg->flags.read == 1)) {
485 addr_t shadow_pa = 0;
488 if (v3_gpa_to_hpa(core, guest_fault_pa, &shadow_pa) == -1) {
489 PrintError("could not translate page fault address (%p)\n", (void *)guest_fault_pa);
493 shadow_pte->page_base_addr = PAGE_BASE_ADDR(shadow_pa);
495 PrintDebug("\tMapping shadow page (%p)\n", (void *)BASE_TO_PAGE_ADDR(shadow_pte->page_base_addr));
497 shadow_pte->present = 1;
499 /* We are assuming that the PDE entry has precedence
500 * so the Shadow PDE will mirror the guest PDE settings,
501 * and we don't have to worry about them here
504 shadow_pte->user_page = 1;
506 //set according to VMM policy
507 shadow_pte->write_through = large_guest_pde->write_through;
508 shadow_pte->cache_disable = large_guest_pde->cache_disable;
509 shadow_pte->global_page = large_guest_pde->global_page;
513 if (shdw_reg->flags.write == 0) {
514 shadow_pte->writable = 0;
516 shadow_pte->writable = 1;
520 // Add this PTE to the reverse map...
521 // This allows us to update this PTE entry if it gets turned into a PT page sometime in the future
522 add_rmap(core->vm_info, pt_pg_data, PAGE_ADDR_4KB(guest_fault_pa), PAGE_ADDR_4KB(fault_addr));
524 // Check for cache entries and mark page read-only, plus tag
526 struct shdw_pg_data * pt_page = NULL;
527 addr_t pg_gpa = PAGE_ADDR_4KB(guest_fault_pa);
529 pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PT32);
531 if (pt_page == NULL) {
532 pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PD32);
535 if (pt_page != NULL) {
536 // This is a page table page...
537 PrintError("Found PT page (large), marking RD-ONLY (va=%p), (gpa=%p)\n",
538 (void *)fault_addr, (void *)pg_gpa);
540 shadow_pte->writable = 0;
541 shadow_pte->vmm_info = V3_CACHED_PG;
547 if (shdw_reg->unhandled(core, fault_addr, guest_fault_pa, shdw_reg, error_code) == -1) {
548 PrintError("Special Page Fault handler returned error for address: %p\n", (void *)fault_addr);
552 } else if (shadow_pte_access == PT_ACCESS_WRITE_ERROR) {
554 // check for cache tag and handle invalidations if it exists.
555 if (shadow_pte->vmm_info == V3_CACHED_PG) {
556 addr_t pg_gpa = PAGE_ADDR_4KB(guest_fault_pa);
557 PrintError("Evicting on a large page\n");
559 if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PD32) == -1) {
560 PrintError("Error Evicting PAGE_PD32 cache entry\n");
564 if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PT32) == -1) {
565 PrintError("Error Evicting PAGE_PT32 cache entry\n");
569 shadow_pte->vmm_info &= ~V3_CACHED_PG;
573 if (shdw_reg->flags.write == 0) {
574 if (shdw_reg->unhandled(core, fault_addr, guest_fault_pa, shdw_reg, error_code) == -1) {
575 PrintError("Special Page Fault handler returned error for address: %p\n", (void *)fault_addr);
579 // set writable after cache eviction, unless overruled by region setting
580 shadow_pte->writable = 1;
584 PrintError("Error in large page fault handler...\n");
585 PrintError("This case should have been handled at the top level handler\n");
589 PrintDebug("Returning from large page->small page fault handler\n");
594 /* If we start to optimize we should look up the guest pages in the cache... */
595 static inline int handle_shadow_invlpg_32(struct guest_info * core, addr_t vaddr) {
596 pde32_t * shadow_pd = (pde32_t *)CR3_TO_PDE32_VA(core->ctrl_regs.cr3);
597 pde32_t * shadow_pde = (pde32_t *)&shadow_pd[PDE32_INDEX(vaddr)];
599 addr_t guest_cr3 = CR3_TO_PDE32_PA(core->shdw_pg_state.guest_cr3);
600 pde32_t * guest_pd = NULL;
603 if (v3_gpa_to_hva(core, guest_cr3, (addr_t*)&guest_pd) == -1) {
604 PrintError("Invalid Guest PDE Address: 0x%p\n", (void *)guest_cr3);
608 guest_pde = (pde32_t *)&(guest_pd[PDE32_INDEX(vaddr)]);
610 // Should we back propagate the invalidations, because they might be cached...??
613 if (guest_pde->large_page == 1) {
614 shadow_pde->present = 0;
615 PrintError("\tInvalidating Large Page (gpa=%p)\n", (void *)BASE_TO_PAGE_ADDR_4MB(guest_pde->pt_base_addr));
616 } else if (shadow_pde->present == 1) {
617 pte32_t * shadow_pt = (pte32_t *)(addr_t)BASE_TO_PAGE_ADDR_4KB(shadow_pde->pt_base_addr);
618 pte32_t * shadow_pte = (pte32_t *) V3_VAddr( (void*) &shadow_pt[PTE32_INDEX(vaddr)] );
622 // PrintError("\tInvalidating small page\n");
625 shadow_pte->present = 0;
628 PrintError("What the fuck?\n");