From: Jack Lange Date: Wed, 20 Oct 2010 19:23:20 +0000 (-0500) Subject: initial shadow page cache version X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=c8a303c8512c0dafcb8dd5f83e6682729165d547 initial shadow page cache version --- diff --git a/Kconfig b/Kconfig index c4aacfb..9d3cbb9 100644 --- a/Kconfig +++ b/Kconfig @@ -213,6 +213,14 @@ config DEBUG_SHDW_PG_VTLB help Enables debugging messages for VTLB implementation +config SHADOW_PAGING_CACHE1 + bool "Shadow Page Cache (1)" + default y + depends on SHADOW_PAGING + help + Enables caching implemenation for shadow paging + + endmenu diff --git a/palacios/src/palacios/mmu/Makefile b/palacios/src/palacios/mmu/Makefile index 5d92236..0089d17 100644 --- a/palacios/src/palacios/mmu/Makefile +++ b/palacios/src/palacios/mmu/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_SHADOW_PAGING_VTLB) += vmm_shdw_pg_tlb.o obj-$(CONFIG_SWAPBYPASS) += vmm_shdw_pg_swapbypass.o +obj-$(CONFIG_SHADOW_PAGING_CACHE1) += vmm_shdw_pg_cache.o diff --git a/palacios/src/palacios/mmu/vmm_shdw_pg_cache.c b/palacios/src/palacios/mmu/vmm_shdw_pg_cache.c new file mode 100644 index 0000000..3f1f0ca --- /dev/null +++ b/palacios/src/palacios/mmu/vmm_shdw_pg_cache.c @@ -0,0 +1,559 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#define DEFAULT_CACHE_SIZE ((32 * 1024 * 1024) / 4096) + +#define V3_CACHED_PG 0x1 + +#ifndef CONFIG_DEBUG_SHDW_PG_CACHE +#undef PrintDebug +#define PrintDebug(fmt, ...) +#endif + + +struct shdw_back_ptr { + addr_t gva; + struct shdw_pg_data * pg_data; + struct list_head back_ptr_node; +}; + +struct guest_pg_tuple { + addr_t gpa; + page_type_t pt_type; +} __attribute__((packed)); + + + +struct rmap_entry { + addr_t gva; + addr_t gpa; + page_type_t pt_type; + struct list_head rmap_node; +}; + +struct shdw_pg_data { + struct guest_pg_tuple tuple; + + addr_t hpa; + void * hva; + + struct list_head back_ptrs; + struct list_head pg_queue_node; + +}; + + + +struct cache_core_state { + + +}; + + +struct cache_vm_state { + + v3_lock_t cache_lock; + + struct hashtable * page_htable; // GPA to shdw_pg_data + struct hashtable * reverse_map; + + + int max_cache_pgs; + int pgs_in_cache; + + struct list_head pg_queue; + + int pgs_in_free_list; + struct list_head free_list; +}; + + + +static inline int evict_pt(void * pt, addr_t va, page_type_t pt_type) { + + switch (pt_type) { + case PAGE_PD32: { + pde32_t * pde = pt; + pde[PDE32_INDEX(va)].writable = 1; + break; + } + case PAGE_4MB: { + pde32_4MB_t * pde = pt; + pde[PDE32_INDEX(va)].writable = 1; + break; + } + case PAGE_PT32: { + pte32_t * pte = pt; + pte[PTE32_INDEX(va)].writable = 1; + break; + } + case PAGE_PML464: { + pml4e64_t * pml = pt; + pml[PML4E64_INDEX(va)].writable = 1; + break; + } + case PAGE_PDP64: { + pdpe64_t * pdp = pt; + pdp[PDPE64_INDEX(va)].writable = 1; + break; + } + case PAGE_PD64: { + pde64_t * pde = pt; + pde[PDE64_INDEX(va)].writable = 1; + break; + } + case PAGE_PT64: { + pte64_t * pte = pt; + pte[PTE64_INDEX(va)].writable = 1; + break; + } + default: + PrintError("Invalid page type: %d\n", pt_type); + return -1; + } + + return 0; +} + + + +static inline int grab_pt(void * pt, addr_t va, page_type_t pt_type) { + + switch (pt_type) { + case PAGE_PD32: { + pde32_t * pde = pt; + pde[PDE32_INDEX(va)].writable = 0; + break; + } + case PAGE_4MB: { + pde32_4MB_t * pde = pt; + pde[PDE32_INDEX(va)].writable = 0; + break; + } + case PAGE_PT32: { + pte32_t * pte = pt; + pte[PTE32_INDEX(va)].writable = 0; + break; + } + case PAGE_PML464: { + pml4e64_t * pml = pt; + pml[PML4E64_INDEX(va)].writable = 0; + break; + } + case PAGE_PDP64: { + pdpe64_t * pdp = pt; + pdp[PDPE64_INDEX(va)].writable = 0; + break; + } + case PAGE_PD64: { + pde64_t * pde = pt; + pde[PDE64_INDEX(va)].writable = 0; + break; + } + case PAGE_PT64: { + pte64_t * pte = pt; + pte[PTE64_INDEX(va)].writable = 0; + break; + } + default: + PrintError("Invalid page type: %d\n", pt_type); + return -1; + } + + return 0; +} + + +static int unlink_shdw_pg(struct shdw_pg_data * pg_data) { + struct shdw_back_ptr * back_ptr = NULL; + struct shdw_back_ptr * tmp_ptr = NULL; + + PrintError("Unlinking gpa=%p, type=%d\n", (void *)pg_data->tuple.gpa, pg_data->tuple.pt_type); + + list_for_each_entry_safe(back_ptr, tmp_ptr, &(pg_data->back_ptrs), back_ptr_node) { + struct shdw_pg_data * parent = back_ptr->pg_data; + + evict_pt(parent->hva, back_ptr->gva, parent->tuple.pt_type); + list_del(&(back_ptr->back_ptr_node)); + V3_Free(back_ptr); + } + + + + return 0; +} + + +static int add_rmap(struct v3_vm_info * vm, struct shdw_pg_data * pg_data, addr_t gpa, addr_t gva) { + struct cache_vm_state * cache_state = vm->shdw_impl.impl_data; + struct list_head * rmap_list = NULL; + struct rmap_entry * entry = NULL; + + + rmap_list = (struct list_head *)v3_htable_search(cache_state->reverse_map, gpa); + + if (rmap_list == NULL) { + rmap_list = V3_Malloc(sizeof(struct list_head)); + INIT_LIST_HEAD(rmap_list); + + v3_htable_insert(cache_state->reverse_map, gpa, (addr_t)rmap_list); + } + + entry = V3_Malloc(sizeof(struct rmap_entry)); + + entry->gva = gva; + entry->gpa = pg_data->tuple.gpa; + entry->pt_type = pg_data->tuple.pt_type; + + list_add(&(entry->rmap_node), rmap_list); + + return 0; +} + + + +static int update_rmap_entries(struct v3_vm_info * vm, addr_t gpa) { + struct cache_vm_state * cache_state = vm->shdw_impl.impl_data; + struct list_head * rmap_list = NULL; + struct rmap_entry * entry = NULL; + int i = 0; + + rmap_list = (struct list_head *)v3_htable_search(cache_state->reverse_map, gpa); + + if (rmap_list == NULL) { + return 0; + } + + PrintError("Updating rmap entries\n\t"); + + list_for_each_entry(entry, rmap_list, rmap_node) { + struct shdw_pg_data * pg_data = NULL; + struct guest_pg_tuple tuple = {entry->gpa, entry->pt_type}; + + V3_Print("%d \n", i); + + pg_data = (struct shdw_pg_data *)v3_htable_search(cache_state->page_htable, (addr_t)&tuple); + + if (!pg_data) { + PrintError("Invalid PTE reference...\n"); + continue; + } + + if (grab_pt(pg_data->hva, entry->gva, entry->pt_type) == -1) { + PrintError("Could not invalidate reverse map entry\n"); + return -1; + } + + i++; + + } + + return 0; +} + + + + +static int link_shdw_pg(struct shdw_pg_data * child_pg, struct shdw_pg_data * parent_pg, addr_t gva) { + struct shdw_back_ptr * back_ptr = V3_Malloc(sizeof(struct shdw_back_ptr)); + memset(back_ptr, 0, sizeof(struct shdw_back_ptr)); + + back_ptr->pg_data = parent_pg; + back_ptr->gva = gva; + + list_add(&(back_ptr->back_ptr_node), &(child_pg->back_ptrs)); + + return 0; +} + + + +static struct shdw_pg_data * find_shdw_pt(struct v3_vm_info * vm, addr_t gpa, page_type_t pt_type) { + struct cache_vm_state * cache_state = vm->shdw_impl.impl_data; + struct shdw_pg_data * pg_data = NULL; + struct guest_pg_tuple tuple = {gpa, pt_type}; + + pg_data = (struct shdw_pg_data *)v3_htable_search(cache_state->page_htable, (addr_t)&tuple); + + if (pg_data != NULL) { + // move pg_data to head of queue, for LRU policy + list_move(&(pg_data->pg_queue_node), &(cache_state->pg_queue)); + } + + return pg_data; +} + + +static int evict_shdw_pg(struct v3_vm_info * vm, addr_t gpa, page_type_t pt_type) { + struct cache_vm_state * cache_state = vm->shdw_impl.impl_data; + struct shdw_pg_data * pg_data = NULL; + + pg_data = find_shdw_pt(vm, gpa, pt_type); + + PrintError("Evicting GPA: %p, type=%d\n", (void *)gpa, pt_type); + + if (pg_data != NULL) { + if (unlink_shdw_pg(pg_data) == -1) { + PrintError("Error unlinking page...\n"); + return -1; + } + + v3_htable_remove(cache_state->page_htable, (addr_t)&(pg_data->tuple), 0); + + + // Move Page to free list + list_move(&(pg_data->pg_queue_node), &(cache_state->free_list)); + cache_state->pgs_in_free_list++; + cache_state->pgs_in_cache--; + } + + return 0; +} + + +static struct shdw_pg_data * pop_queue_pg(struct v3_vm_info * vm, + struct cache_vm_state * cache_state) { + struct shdw_pg_data * pg_data = NULL; + + pg_data = list_tail_entry(&(cache_state->pg_queue), struct shdw_pg_data, pg_queue_node); + + + if (unlink_shdw_pg(pg_data) == -1) { + PrintError("Error unlinking cached page\n"); + return NULL; + } + + v3_htable_remove(cache_state->page_htable, (addr_t)&(pg_data->tuple), 0); + list_del(&(pg_data->pg_queue_node)); + + cache_state->pgs_in_cache--; + + return pg_data; +} + +static struct shdw_pg_data * create_shdw_pt(struct v3_vm_info * vm, addr_t gpa, page_type_t pt_type) { + struct cache_vm_state * cache_state = vm->shdw_impl.impl_data; + struct shdw_pg_data * pg_data = NULL; + + + PrintError("Creating shdw page: gpa=%p, type=%d\n", (void *)gpa, pt_type); + + if (cache_state->pgs_in_cache < cache_state->max_cache_pgs) { + pg_data = V3_Malloc(sizeof(struct shdw_pg_data)); + + pg_data->hpa = (addr_t)V3_AllocPages(1); + pg_data->hva = (void *)V3_VAddr((void *)pg_data->hpa); + + } else if (cache_state->pgs_in_free_list) { + // pull from free list + pg_data = list_tail_entry(&(cache_state->free_list), struct shdw_pg_data, pg_queue_node); + + list_del(&(pg_data->pg_queue_node)); + cache_state->pgs_in_free_list--; + + } else { + // pull from queue + pg_data = pop_queue_pg(vm, cache_state); + } + + + if (pg_data == NULL) { + PrintError("Error creating Shadow Page table page\n"); + return NULL; + } + + memset(pg_data->hva, 0, PAGE_SIZE_4KB); + + pg_data->tuple.gpa = gpa; + pg_data->tuple.pt_type = pt_type; + + INIT_LIST_HEAD(&(pg_data->back_ptrs)); + + v3_htable_insert(cache_state->page_htable, (addr_t)&(pg_data->tuple), (addr_t)pg_data); + + list_add(&(pg_data->pg_queue_node), &(cache_state->pg_queue)); + cache_state->pgs_in_cache++; + + return pg_data; + +} + + +#include "vmm_shdw_pg_cache_32.h" +//#include "vmm_shdw_pg_cache_32pae.h" +//#include "vmm_shdw_pg_cache_64.h" + + +static uint_t cache_hash_fn(addr_t key) { + struct guest_pg_tuple * tuple = (struct guest_pg_tuple *)key; + + return v3_hash_buffer((uint8_t *)tuple, sizeof(struct guest_pg_tuple)); +} + +static int cache_eq_fn(addr_t key1, addr_t key2) { + struct guest_pg_tuple * tuple1 = (struct guest_pg_tuple *)key1; + struct guest_pg_tuple * tuple2 = (struct guest_pg_tuple *)key2; + + return ((tuple1->gpa == tuple2->gpa) && (tuple1->pt_type == tuple2->pt_type)); +} + +static uint_t rmap_hash_fn(addr_t key) { + return v3_hash_long(key, sizeof(addr_t) * 8); +} + +static int rmap_eq_fn(addr_t key1, addr_t key2) { + return (key1 == key2); +} + + +static int cache_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { + struct v3_shdw_impl_state * vm_state = &(vm->shdw_impl); + struct cache_vm_state * cache_state = NULL; + int cache_size = DEFAULT_CACHE_SIZE; + char * cache_sz_str = v3_cfg_val(cfg, "cache_size"); + + if (cache_sz_str != NULL) { + cache_size = ((atoi(cache_sz_str) * 1024 * 1024) / 4096); + } + + V3_Print("Shadow Page Cache initialization\n"); + + cache_state = V3_Malloc(sizeof(struct cache_vm_state)); + memset(cache_state, 0, sizeof(struct cache_vm_state)); + + cache_state->page_htable = v3_create_htable(0, cache_hash_fn, cache_eq_fn); + cache_state->reverse_map = v3_create_htable(0, rmap_hash_fn, rmap_eq_fn); + v3_lock_init(&(cache_state->cache_lock)); + INIT_LIST_HEAD(&(cache_state->pg_queue)); + INIT_LIST_HEAD(&(cache_state->free_list)); + cache_state->max_cache_pgs = cache_size; + + vm_state->impl_data = cache_state; + + return 0; +} + + +static int cache_deinit(struct v3_vm_info * vm) { + return -1; +} + + +static int cache_local_init(struct guest_info * core) { + // struct v3_shdw_pg_state * core_state = &(vm->shdw_pg_state); + + + return 0; +} + +static int cache_activate_shdw_pt(struct guest_info * core) { + switch (v3_get_vm_cpu_mode(core)) { + + case PROTECTED: + PrintError("Calling 32 bit cache activation\n"); + return activate_shadow_pt_32(core); + case PROTECTED_PAE: + // return activate_shadow_pt_32pae(core); + case LONG: + case LONG_32_COMPAT: + case LONG_16_COMPAT: + // return activate_shadow_pt_64(core); + default: + PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core))); + return -1; + } + + return 0; +} + +static int cache_invalidate_shdw_pt(struct guest_info * core) { + // wipe everything... + V3_Print("Cache invalidation called\n"); + + return cache_activate_shdw_pt(core); +} + + + +static int cache_handle_pf(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) { + + switch (v3_get_vm_cpu_mode(core)) { + case PROTECTED: + return handle_shadow_pagefault_32(core, fault_addr, error_code); + break; + case PROTECTED_PAE: + // return handle_shadow_pagefault_32pae(core, fault_addr, error_code); + case LONG: + case LONG_32_COMPAT: + case LONG_16_COMPAT: + // return handle_shadow_pagefault_64(core, fault_addr, error_code); + default: + PrintError("Unhandled CPU Mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core))); + return -1; + } +} + + +static int cache_handle_invlpg(struct guest_info * core, addr_t vaddr) { + PrintError("INVLPG called for %p\n", (void *)vaddr); + + switch (v3_get_vm_cpu_mode(core)) { + case PROTECTED: + return handle_shadow_invlpg_32(core, vaddr); + case PROTECTED_PAE: + // return handle_shadow_invlpg_32pae(core, vaddr); + case LONG: + case LONG_32_COMPAT: + case LONG_16_COMPAT: + // return handle_shadow_invlpg_64(core, vaddr); + default: + PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core))); + return -1; + } +} + + + + + + +static struct v3_shdw_pg_impl cache_impl = { + .name = "SHADOW_CACHE", + .init = cache_init, + .deinit = cache_deinit, + .local_init = cache_local_init, + .handle_pagefault = cache_handle_pf, + .handle_invlpg = cache_handle_invlpg, + .activate_shdw_pt = cache_activate_shdw_pt, + .invalidate_shdw_pt = cache_invalidate_shdw_pt +}; + + + +register_shdw_pg_impl(&cache_impl); diff --git a/palacios/src/palacios/mmu/vmm_shdw_pg_cache_32.h b/palacios/src/palacios/mmu/vmm_shdw_pg_cache_32.h new file mode 100644 index 0000000..7d48f6b --- /dev/null +++ b/palacios/src/palacios/mmu/vmm_shdw_pg_cache_32.h @@ -0,0 +1,618 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + + +static inline int activate_shadow_pt_32(struct guest_info * core) { + struct cr3_32 * shadow_cr3 = (struct cr3_32 *)&(core->ctrl_regs.cr3); + struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(core->shdw_pg_state.guest_cr3); + addr_t gpa = BASE_TO_PAGE_ADDR_4KB(guest_cr3->pdt_base_addr); + struct shdw_pg_data * shdw_pg = NULL; + + PrintDebug("Activating 32 Bit cacheable page tables\n"); + shdw_pg = find_shdw_pt(core->vm_info, gpa, PAGE_PD32); + + PrintError("shdw_pg returned as %p\n", shdw_pg); + + if (shdw_pg == NULL) { + shdw_pg = create_shdw_pt(core->vm_info, gpa, PAGE_PD32); + } + + PrintDebug("shdw_pg now exists...\n"); + + shadow_cr3->pdt_base_addr = PAGE_BASE_ADDR_4KB(shdw_pg->hpa); + shadow_cr3->pwt = guest_cr3->pwt; + shadow_cr3->pcd = guest_cr3->pcd; + + return 0; +} + + + +/* + * * + * * + * * 32 bit Page table fault handlers + * * + * * + */ +/* +static int handle_4MB_shadow_pagefault_pde_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code, + pt_access_status_t shadow_pde_access, pde32_4MB_t * large_shadow_pde, + pde32_4MB_t * large_guest_pde); +*/ +static int handle_4MB_shadow_pagefault_pte_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code, + pte32_t * shadow_pt, pde32_4MB_t * large_guest_pde, struct shdw_pg_data * pt_pg_data); + +static int handle_pte_shadow_pagefault_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code, + pte32_t * shadow_pt, pte32_t * guest_pt, struct shdw_pg_data * pt_pg_data); + + + + +static inline int handle_shadow_pagefault_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) { + pde32_t * guest_pd = NULL; + pde32_t * shadow_pd = CR3_TO_PDE32_VA(core->ctrl_regs.cr3); + addr_t guest_cr3 = CR3_TO_PDE32_PA(core->shdw_pg_state.guest_cr3); + pt_access_status_t guest_pde_access; + pt_access_status_t shadow_pde_access; + pde32_t * guest_pde = NULL; + pde32_t * shadow_pde = (pde32_t *)&(shadow_pd[PDE32_INDEX(fault_addr)]); + + PrintDebug("Shadow cache page fault handler: %p\n", (void *)fault_addr ); + PrintDebug("Handling PDE32 Fault\n"); + + if (v3_gpa_to_hva(core, guest_cr3, (addr_t*)&guest_pd) == -1) { + PrintError("Invalid Guest PDE Address: 0x%p\n", (void *)guest_cr3); + return -1; + } + + guest_pde = (pde32_t *)&(guest_pd[PDE32_INDEX(fault_addr)]); + + // Check the guest page permissions + guest_pde_access = v3_can_access_pde32(guest_pd, fault_addr, error_code); + + // Check the shadow page permissions + shadow_pde_access = v3_can_access_pde32(shadow_pd, fault_addr, error_code); + + /* Was the page fault caused by the Guest's page tables? */ + if (v3_is_guest_pf(guest_pde_access, shadow_pde_access) == 1) { + PrintDebug("Injecting PDE pf to guest: (guest access error=%d) (shdw access error=%d) (pf error code=%d)\n", + *(uint_t *)&guest_pde_access, *(uint_t *)&shadow_pde_access, *(uint_t *)&error_code); + if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) { + PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr); + return -1; + } + return 0; + } + + + + if (shadow_pde_access == PT_ACCESS_USER_ERROR) { + // + // PDE Entry marked non user + // + PrintDebug("Shadow Paging User access error (shadow_pde_access=0x%x, guest_pde_access=0x%x)\n", + shadow_pde_access, guest_pde_access); + + if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) { + PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr); + return -1; + } + return 0; + } else if ((shadow_pde_access == PT_ACCESS_WRITE_ERROR) && + (guest_pde->large_page == 1)) { + + ((pde32_4MB_t *)guest_pde)->dirty = 1; + shadow_pde->writable = guest_pde->writable; + return 0; + } else if ((shadow_pde_access != PT_ACCESS_NOT_PRESENT) && + (shadow_pde_access != PT_ACCESS_OK)) { + // inject page fault in guest + if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) { + PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr); + return -1; + } + PrintDebug("Unknown Error occurred (shadow_pde_access=%d)\n", shadow_pde_access); + PrintDebug("Manual Says to inject page fault into guest\n"); + return 0; + } + + + pte32_t * shadow_pt = NULL; + pte32_t * guest_pt = NULL; + + + /* Set up cache state */ + addr_t gpa = BASE_TO_PAGE_ADDR_4KB(guest_pde->pt_base_addr); + + + struct shdw_pg_data * shdw_page = NULL; + page_type_t pt_type = PAGE_PT32; + + if (guest_pde->large_page == 1) { + // Handle Large pages, for this we use the PAGE_4MB type... + pt_type = PAGE_4MB; + } + + shdw_page = find_shdw_pt(core->vm_info, gpa, pt_type); + + if (shdw_page == NULL) { + shdw_page = create_shdw_pt(core->vm_info, gpa, pt_type); + } + + // update current reverse map entries... + // We are now using this page in a PT, so: + // any existing writable mappings must be updated + update_rmap_entries(core->vm_info, gpa); + + struct shdw_pg_data * parent_page = find_shdw_pt(core->vm_info, guest_cr3, PAGE_PD32); + + if (parent_page != NULL) { + // add back pointer to PDE, if it exists + link_shdw_pg(shdw_page, parent_page, PAGE_ADDR_4KB(fault_addr)); + } + + + // Get the next shadow page level, allocate if not present + + if (shadow_pde_access == PT_ACCESS_NOT_PRESENT) { + + /* Currently we do not support large pages + This requires us to scan the large page for Page table pages, and split the entries if they exist. + Its easier to just ignore this for now... + + if ((core->use_large_pages == 1) && (guest_pde->large_page == 1)) { + // Check underlying physical memory map to see if a large page is viable + addr_t gpa_4MB = BASE_TO_PAGE_ADDR_4MB(((pde32_4MB_t *)guest_pde)->page_base_addr); + uint32_t page_size = v3_get_max_page_size(core, gpa_4MB, PROTECTED); + + if (page_size == PAGE_SIZE_4MB) { + PrintDebug("using large page for fault_addr %p (gpa=%p)\n", (void *)fault_addr, (void *)gpa_4MB); + if (handle_4MB_shadow_pagefault_pde_32(core, fault_addr, error_code, shadow_pde_access, + (pde32_4MB_t *)shadow_pde, (pde32_4MB_t *)guest_pde) == -1) { + PrintError("Error handling large pagefault with large page\n"); + return -1; + } + + return 0; + } + } + */ + + + + + shadow_pt = (pte32_t *)(shdw_page->hva); + + + + shadow_pde->present = 1; + shadow_pde->user_page = guest_pde->user_page; + + + if (guest_pde->large_page == 0) { + shadow_pde->writable = guest_pde->writable; + } else { + // This large page flag is temporary until we can get a working cache.... + ((pde32_4MB_t *)guest_pde)->vmm_info = V3_LARGE_PG; + + if (error_code.write) { + shadow_pde->writable = guest_pde->writable; + ((pde32_4MB_t *)guest_pde)->dirty = 1; + } else { + shadow_pde->writable = 0; + ((pde32_4MB_t *)guest_pde)->dirty = 0; + } + } + + // VMM Specific options + shadow_pde->write_through = guest_pde->write_through; + shadow_pde->cache_disable = guest_pde->cache_disable; + shadow_pde->global_page = guest_pde->global_page; + // + + guest_pde->accessed = 1; + + shadow_pde->pt_base_addr = PAGE_BASE_ADDR(shdw_page->hpa); + } else { + shadow_pt = (pte32_t *)V3_VAddr((void *)BASE_TO_PAGE_ADDR(shadow_pde->pt_base_addr)); + } + + + if (guest_pde->large_page == 0) { + if (v3_gpa_to_hva(core, BASE_TO_PAGE_ADDR(guest_pde->pt_base_addr), (addr_t*)&guest_pt) == -1) { + // Machine check the guest + PrintDebug("Invalid Guest PTE Address: 0x%p\n", (void *)BASE_TO_PAGE_ADDR(guest_pde->pt_base_addr)); + v3_raise_exception(core, MC_EXCEPTION); + return 0; + } + + if (handle_pte_shadow_pagefault_32(core, fault_addr, error_code, shadow_pt, guest_pt, shdw_page) == -1) { + PrintError("Error handling Page fault caused by PTE\n"); + return -1; + } + } else { + if (handle_4MB_shadow_pagefault_pte_32(core, fault_addr, error_code, shadow_pt, (pde32_4MB_t *)guest_pde, shdw_page) == -1) { + PrintError("Error handling large pagefault\n"); + return -1; + } + } + + return 0; +} + + + + +static int handle_pte_shadow_pagefault_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code, + pte32_t * shadow_pt, pte32_t * guest_pt, struct shdw_pg_data * pt_pg_data) { + + pt_access_status_t guest_pte_access; + pt_access_status_t shadow_pte_access; + pte32_t * guest_pte = (pte32_t *)&(guest_pt[PTE32_INDEX(fault_addr)]);; + pte32_t * shadow_pte = (pte32_t *)&(shadow_pt[PTE32_INDEX(fault_addr)]); + addr_t guest_pa = BASE_TO_PAGE_ADDR((addr_t)(guest_pte->page_base_addr)) + PAGE_OFFSET(fault_addr); + + struct v3_mem_region * shdw_reg = v3_get_mem_region(core->vm_info, core->cpu_id, guest_pa); + + if (shdw_reg == NULL) { + // Inject a machine check in the guest + PrintDebug("Invalid Guest Address in page table (0x%p)\n", (void *)guest_pa); + v3_raise_exception(core, MC_EXCEPTION); + return 0; + } + + // Check the guest page permissions + guest_pte_access = v3_can_access_pte32(guest_pt, fault_addr, error_code); + + // Check the shadow page permissions + shadow_pte_access = v3_can_access_pte32(shadow_pt, fault_addr, error_code); + + + /* Was the page fault caused by the Guest's page tables? */ + if (v3_is_guest_pf(guest_pte_access, shadow_pte_access) == 1) { + + PrintDebug("Access error injecting pf to guest (guest access error=%d) (pf error code=%d)\n", + guest_pte_access, *(uint_t*)&error_code); + + + // inject: + if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) { + PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr); + return -1; + } + + return 0; + } + + + + if (shadow_pte_access == PT_ACCESS_OK) { + // Inconsistent state... + // Guest Re-Entry will flush page tables and everything should now work + PrintDebug("Inconsistent state... Guest re-entry should flush tlb\n"); + return 0; + } + + + if (shadow_pte_access == PT_ACCESS_NOT_PRESENT) { + // Page Table Entry Not Present + PrintDebug("guest_pa =%p\n", (void *)guest_pa); + + if ((shdw_reg->flags.alloced == 1) && (shdw_reg->flags.read == 1)) { + addr_t shadow_pa = 0; + + if (v3_gpa_to_hpa(core, guest_pa, &shadow_pa) == -1) { + PrintError("could not translate page fault address (%p)\n", (void *)guest_pa); + return -1; + } + + shadow_pte->page_base_addr = PAGE_BASE_ADDR(shadow_pa); + + PrintDebug("\tMapping shadow page (%p)\n", (void *)BASE_TO_PAGE_ADDR(shadow_pte->page_base_addr)); + + shadow_pte->present = guest_pte->present; + shadow_pte->user_page = guest_pte->user_page; + + //set according to VMM policy + shadow_pte->write_through = guest_pte->write_through; + shadow_pte->cache_disable = guest_pte->cache_disable; + shadow_pte->global_page = guest_pte->global_page; + // + + guest_pte->accessed = 1; + + if (guest_pte->dirty == 1) { + shadow_pte->writable = guest_pte->writable; + } else if ((guest_pte->dirty == 0) && (error_code.write == 1)) { + shadow_pte->writable = guest_pte->writable; + guest_pte->dirty = 1; + } else if ((guest_pte->dirty == 0) && (error_code.write == 0)) { + shadow_pte->writable = 0; + } + + + if (shdw_reg->flags.write == 0) { + shadow_pte->writable = 0; + } + + + // Add this PTE to the reverse map... + // This allows us to update this PTE entry if it gets turned into a PT page + add_rmap(core->vm_info, pt_pg_data, PAGE_ADDR_4KB(guest_pa), PAGE_ADDR_4KB(fault_addr)); + + // Check for cache entries and mark page read-only, plus tag + { + struct shdw_pg_data * pt_page = NULL; + addr_t pg_gpa = PAGE_ADDR_4KB(guest_pa); + + pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PT32); + + if (pt_page == NULL) { + pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PD32); + } + + if (pt_page != NULL) { + // This is a page table page... + shadow_pte->writable = 0; + shadow_pte->vmm_info = V3_CACHED_PG; + } + } + + } else { + // Page fault on unhandled memory region + + if (shdw_reg->unhandled(core, fault_addr, guest_pa, shdw_reg, error_code) == -1) { + PrintError("Special Page fault handler returned error for address: %p\n", (void *)fault_addr); + return -1; + } + } + } else if (shadow_pte_access == PT_ACCESS_WRITE_ERROR) { + guest_pte->dirty = 1; + + // check for cache tag and handle invalidations if it exists. + if (shadow_pte->vmm_info == V3_CACHED_PG) { + addr_t pg_gpa = PAGE_ADDR_4KB(guest_pa); + + PrintError("Evicting on a small page\n"); + + + if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PD32) == -1) { + PrintError("Error Evicting PAGE_PD32 cache entry\n"); + return -1; + } + + if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PT32) == -1) { + PrintError("Error Evicting PAGE_PT32 cache entry\n"); + return -1; + } + + shadow_pte->vmm_info &= ~V3_CACHED_PG; + } + + + if (shdw_reg->flags.write == 1) { + PrintDebug("Shadow PTE Write Error\n"); + shadow_pte->writable = guest_pte->writable; + } else { + if (shdw_reg->unhandled(core, fault_addr, guest_pa, shdw_reg, error_code) == -1) { + PrintError("Special Page fault handler returned error for address: %p\n", (void *)fault_addr); + return -1; + } + } + + + return 0; + + } else { + // Inject page fault into the guest + if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) { + PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr); + return -1; + } + + PrintError("PTE Page fault fell through... Not sure if this should ever happen\n"); + PrintError("Manual Says to inject page fault into guest\n"); + return -1; + } + + return 0; +} + +// Handle a 4MB page fault with small pages in the PTE +static int handle_4MB_shadow_pagefault_pte_32(struct guest_info * core, + addr_t fault_addr, pf_error_t error_code, + pte32_t * shadow_pt, pde32_4MB_t * large_guest_pde, + struct shdw_pg_data * pt_pg_data) +{ + pt_access_status_t shadow_pte_access = v3_can_access_pte32(shadow_pt, fault_addr, error_code); + pte32_t * shadow_pte = (pte32_t *)&(shadow_pt[PTE32_INDEX(fault_addr)]); + addr_t guest_fault_pa = BASE_TO_PAGE_ADDR_4MB(large_guest_pde->page_base_addr) + PAGE_OFFSET_4MB(fault_addr); + + + PrintDebug("Handling 4MB fault (guest_fault_pa=%p) (error_code=%x)\n", (void *)guest_fault_pa, *(uint_t*)&error_code); + PrintDebug("ShadowPT=%p, LargeGuestPDE=%p\n", shadow_pt, large_guest_pde); + + struct v3_mem_region * shdw_reg = v3_get_mem_region(core->vm_info, core->cpu_id, guest_fault_pa); + + + if (shdw_reg == NULL) { + // Inject a machine check in the guest + PrintDebug("Invalid Guest Address in page table (0x%p)\n", (void *)guest_fault_pa); + v3_raise_exception(core, MC_EXCEPTION); + return -1; + } + + if (shadow_pte_access == PT_ACCESS_OK) { + // Inconsistent state... + // Guest Re-Entry will flush tables and everything should now workd + PrintDebug("Inconsistent state... Guest re-entry should flush tlb\n"); + return 0; + } + + + if (shadow_pte_access == PT_ACCESS_NOT_PRESENT) { + // Get the guest physical address of the fault + + if ((shdw_reg->flags.alloced == 1) && + (shdw_reg->flags.read == 1)) { + addr_t shadow_pa = 0; + + + if (v3_gpa_to_hpa(core, guest_fault_pa, &shadow_pa) == -1) { + PrintError("could not translate page fault address (%p)\n", (void *)guest_fault_pa); + return -1; + } + + shadow_pte->page_base_addr = PAGE_BASE_ADDR(shadow_pa); + + PrintDebug("\tMapping shadow page (%p)\n", (void *)BASE_TO_PAGE_ADDR(shadow_pte->page_base_addr)); + + shadow_pte->present = 1; + + /* We are assuming that the PDE entry has precedence + * so the Shadow PDE will mirror the guest PDE settings, + * and we don't have to worry about them here + * Allow everything + */ + shadow_pte->user_page = 1; + + //set according to VMM policy + shadow_pte->write_through = large_guest_pde->write_through; + shadow_pte->cache_disable = large_guest_pde->cache_disable; + shadow_pte->global_page = large_guest_pde->global_page; + // + + + if (shdw_reg->flags.write == 0) { + shadow_pte->writable = 0; + } else { + shadow_pte->writable = 1; + } + + + // Add this PTE to the reverse map... + // This allows us to update this PTE entry if it gets turned into a PT page sometime in the future + add_rmap(core->vm_info, pt_pg_data, PAGE_ADDR_4KB(guest_fault_pa), PAGE_ADDR_4KB(fault_addr)); + + // Check for cache entries and mark page read-only, plus tag + { + struct shdw_pg_data * pt_page = NULL; + addr_t pg_gpa = PAGE_ADDR_4KB(guest_fault_pa); + + pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PT32); + + if (pt_page == NULL) { + pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PD32); + } + + if (pt_page != NULL) { + // This is a page table page... + shadow_pte->writable = 0; + shadow_pte->vmm_info = V3_CACHED_PG; + } + + } + + } else { + if (shdw_reg->unhandled(core, fault_addr, guest_fault_pa, shdw_reg, error_code) == -1) { + PrintError("Special Page Fault handler returned error for address: %p\n", (void *)fault_addr); + return -1; + } + } + } else if (shadow_pte_access == PT_ACCESS_WRITE_ERROR) { + + // check for cache tag and handle invalidations if it exists. + if (shadow_pte->vmm_info == V3_CACHED_PG) { + addr_t pg_gpa = PAGE_ADDR_4KB(guest_fault_pa); + PrintError("Evicting on a large page\n"); + + if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PD32) == -1) { + PrintError("Error Evicting PAGE_PD32 cache entry\n"); + return -1; + } + + if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PT32) == -1) { + PrintError("Error Evicting PAGE_PT32 cache entry\n"); + return -1; + } + + shadow_pte->vmm_info &= ~V3_CACHED_PG; + } + + + if (shdw_reg->flags.write == 0) { + if (shdw_reg->unhandled(core, fault_addr, guest_fault_pa, shdw_reg, error_code) == -1) { + PrintError("Special Page Fault handler returned error for address: %p\n", (void *)fault_addr); + return -1; + } + } else { + // set writable after cache eviction, unless overruled by region setting + shadow_pte->writable = 1; + } + + } else { + PrintError("Error in large page fault handler...\n"); + PrintError("This case should have been handled at the top level handler\n"); + return -1; + } + + PrintDebug("Returning from large page->small page fault handler\n"); + return 0; +} + + +/* If we start to optimize we should look up the guest pages in the cache... */ +static inline int handle_shadow_invlpg_32(struct guest_info * core, addr_t vaddr) { + pde32_t * shadow_pd = (pde32_t *)CR3_TO_PDE32_VA(core->ctrl_regs.cr3); + pde32_t * shadow_pde = (pde32_t *)&shadow_pd[PDE32_INDEX(vaddr)]; + + addr_t guest_cr3 = CR3_TO_PDE32_PA(core->shdw_pg_state.guest_cr3); + pde32_t * guest_pd = NULL; + pde32_t * guest_pde; + + if (v3_gpa_to_hva(core, guest_cr3, (addr_t*)&guest_pd) == -1) { + PrintError("Invalid Guest PDE Address: 0x%p\n", (void *)guest_cr3); + return -1; + } + + guest_pde = (pde32_t *)&(guest_pd[PDE32_INDEX(vaddr)]); + + + if (guest_pde->large_page == 1) { + shadow_pde->present = 0; + PrintError("\tInvalidating Large Page (gpa=%p)\n", (void *)BASE_TO_PAGE_ADDR_4MB(guest_pde->pt_base_addr)); + } else if (shadow_pde->present == 1) { + pte32_t * shadow_pt = (pte32_t *)(addr_t)BASE_TO_PAGE_ADDR_4KB(shadow_pde->pt_base_addr); + pte32_t * shadow_pte = (pte32_t *) V3_VAddr( (void*) &shadow_pt[PTE32_INDEX(vaddr)] ); + + + + PrintError("\tInvalidating small page\n"); + + + shadow_pte->present = 0; + } else { + + PrintError("What the fuck?\n"); + } + return 0; +} + + diff --git a/palacios/src/palacios/mmu/vmm_shdw_pg_swapbypass.c b/palacios/src/palacios/mmu/vmm_shdw_pg_swapbypass.c index af23fed..e8c7209 100644 --- a/palacios/src/palacios/mmu/vmm_shdw_pg_swapbypass.c +++ b/palacios/src/palacios/mmu/vmm_shdw_pg_swapbypass.c @@ -205,7 +205,7 @@ static addr_t map_swp_page(struct v3_vm_info * vm, pte32_t * shadow_pte, pte32_t shdw_ptr_list = (struct list_head *)v3_htable_search(swap_state->shdw_ptr_ht, (addr_t)*(uint32_t *)guest_pte); if (shdw_ptr_list == NULL) { - shdw_ptr_list = (struct list_head *)V3_Malloc(sizeof(struct list_head *)); + shdw_ptr_list = (struct list_head *)V3_Malloc(sizeof(struct list_head)); #ifdef CONFIG_SWAPBYPASS_TELEMETRY swap_state->list_size++; #endif diff --git a/palacios/src/palacios/vmm_config.c b/palacios/src/palacios/vmm_config.c index 6ccf509..738d2ee 100644 --- a/palacios/src/palacios/vmm_config.c +++ b/palacios/src/palacios/vmm_config.c @@ -327,11 +327,12 @@ static int determine_paging_mode(struct guest_info *info, v3_cfg_tree_t * core_c return -1; } - if (strcasecmp(v3_cfg_val(pg_tree, "large_pages"), "true") == 0) { - info->use_large_pages = 1; - PrintDebug("Use of large pages in memory virtualization enabled.\n"); + if (v3_cfg_val(pg_tree, "large_pages") != NULL) { + if (strcasecmp(v3_cfg_val(pg_tree, "large_pages"), "true") == 0) { + info->use_large_pages = 1; + PrintDebug("Use of large pages in memory virtualization enabled.\n"); + } } - return 0; }