Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


initial shadow page cache version
Jack Lange [Wed, 20 Oct 2010 19:23:20 +0000 (14:23 -0500)]
Kconfig
palacios/src/palacios/mmu/Makefile
palacios/src/palacios/mmu/vmm_shdw_pg_cache.c [new file with mode: 0644]
palacios/src/palacios/mmu/vmm_shdw_pg_cache_32.h [new file with mode: 0644]
palacios/src/palacios/mmu/vmm_shdw_pg_swapbypass.c
palacios/src/palacios/vmm_config.c

diff --git a/Kconfig b/Kconfig
index c4aacfb..9d3cbb9 100644 (file)
--- a/Kconfig
+++ b/Kconfig
@@ -213,6 +213,14 @@ config DEBUG_SHDW_PG_VTLB
        help
          Enables debugging messages for VTLB implementation
 
+config SHADOW_PAGING_CACHE1
+       bool "Shadow Page Cache (1)"
+       default y
+       depends on SHADOW_PAGING
+       help 
+          Enables caching implemenation for shadow paging
+
+
 endmenu
 
 
index 5d92236..0089d17 100644 (file)
@@ -1,2 +1,3 @@
 obj-$(CONFIG_SHADOW_PAGING_VTLB) += vmm_shdw_pg_tlb.o
 obj-$(CONFIG_SWAPBYPASS) += vmm_shdw_pg_swapbypass.o
+obj-$(CONFIG_SHADOW_PAGING_CACHE1) += vmm_shdw_pg_cache.o
diff --git a/palacios/src/palacios/mmu/vmm_shdw_pg_cache.c b/palacios/src/palacios/mmu/vmm_shdw_pg_cache.c
new file mode 100644 (file)
index 0000000..3f1f0ca
--- /dev/null
@@ -0,0 +1,559 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm_shadow_paging.h>
+#include <palacios/vmm_swapbypass.h>
+#include <palacios/vmm_ctrl_regs.h>
+
+#include <palacios/vm_guest.h>
+#include <palacios/vm_guest_mem.h>
+#include <palacios/vmm_paging.h>
+#include <palacios/vmm_hashtable.h>
+#include <palacios/vmm_list.h>
+
+#define DEFAULT_CACHE_SIZE ((32 * 1024 * 1024) / 4096)
+
+#define V3_CACHED_PG 0x1
+
+#ifndef CONFIG_DEBUG_SHDW_PG_CACHE
+#undef PrintDebug
+#define PrintDebug(fmt, ...)
+#endif
+
+
+struct shdw_back_ptr {
+    addr_t gva;
+    struct shdw_pg_data * pg_data;
+    struct list_head back_ptr_node;
+};
+
+struct guest_pg_tuple {
+    addr_t gpa;
+    page_type_t pt_type;    
+} __attribute__((packed));
+
+
+
+struct rmap_entry {
+    addr_t gva;
+    addr_t gpa;
+    page_type_t pt_type;
+    struct list_head rmap_node;
+};
+
+struct shdw_pg_data {
+    struct guest_pg_tuple tuple;
+
+    addr_t hpa;
+    void * hva;
+
+    struct list_head back_ptrs;
+    struct list_head pg_queue_node;
+
+};
+
+
+
+struct cache_core_state {
+
+
+};
+
+
+struct cache_vm_state {
+    
+    v3_lock_t cache_lock;
+
+    struct hashtable * page_htable; // GPA to shdw_pg_data
+    struct hashtable * reverse_map;
+
+
+    int max_cache_pgs;
+    int pgs_in_cache;
+
+    struct list_head pg_queue;
+
+    int pgs_in_free_list;
+    struct list_head free_list;
+};
+
+
+
+static  inline int evict_pt(void * pt, addr_t va, page_type_t pt_type) {
+    
+    switch (pt_type) {
+       case PAGE_PD32: {
+           pde32_t * pde = pt;
+           pde[PDE32_INDEX(va)].writable = 1;
+           break;
+       }
+       case PAGE_4MB: {
+           pde32_4MB_t * pde = pt;
+           pde[PDE32_INDEX(va)].writable = 1;
+           break;
+       }
+       case PAGE_PT32: {
+           pte32_t * pte = pt;
+           pte[PTE32_INDEX(va)].writable = 1;
+           break;
+       }
+       case PAGE_PML464: {
+           pml4e64_t * pml = pt;
+           pml[PML4E64_INDEX(va)].writable = 1;
+           break;
+       }
+       case PAGE_PDP64: {
+           pdpe64_t * pdp = pt;
+           pdp[PDPE64_INDEX(va)].writable = 1;
+           break;
+       }
+       case PAGE_PD64: {
+           pde64_t * pde = pt;
+           pde[PDE64_INDEX(va)].writable = 1;
+           break;
+       }
+       case PAGE_PT64: {
+           pte64_t * pte = pt;
+           pte[PTE64_INDEX(va)].writable = 1;
+           break;
+       }
+       default:
+           PrintError("Invalid page type: %d\n", pt_type);
+           return -1;
+    }
+
+    return 0;
+}
+
+
+
+static  inline int grab_pt(void * pt, addr_t va, page_type_t pt_type) {
+    
+    switch (pt_type) {
+       case PAGE_PD32: {
+           pde32_t * pde = pt;
+           pde[PDE32_INDEX(va)].writable = 0;
+           break;
+       }
+       case PAGE_4MB: {
+           pde32_4MB_t * pde = pt;
+           pde[PDE32_INDEX(va)].writable = 0;
+           break;
+       }
+       case PAGE_PT32: {
+           pte32_t * pte = pt;
+           pte[PTE32_INDEX(va)].writable = 0;
+           break;
+       }
+       case PAGE_PML464: {
+           pml4e64_t * pml = pt;
+           pml[PML4E64_INDEX(va)].writable = 0;
+           break;
+       }
+       case PAGE_PDP64: {
+           pdpe64_t * pdp = pt;
+           pdp[PDPE64_INDEX(va)].writable = 0;
+           break;
+       }
+       case PAGE_PD64: {
+           pde64_t * pde = pt;
+           pde[PDE64_INDEX(va)].writable = 0;
+           break;
+       }
+       case PAGE_PT64: {
+           pte64_t * pte = pt;
+           pte[PTE64_INDEX(va)].writable = 0;
+           break;
+       }
+       default:
+           PrintError("Invalid page type: %d\n", pt_type);
+           return -1;
+    }
+
+    return 0;
+}
+
+
+static int unlink_shdw_pg(struct shdw_pg_data * pg_data) {
+    struct shdw_back_ptr * back_ptr = NULL;
+    struct shdw_back_ptr * tmp_ptr = NULL;
+
+    PrintError("Unlinking gpa=%p, type=%d\n", (void *)pg_data->tuple.gpa, pg_data->tuple.pt_type);
+
+    list_for_each_entry_safe(back_ptr, tmp_ptr, &(pg_data->back_ptrs), back_ptr_node) {
+       struct shdw_pg_data * parent = back_ptr->pg_data;
+       
+       evict_pt(parent->hva, back_ptr->gva, parent->tuple.pt_type);
+       list_del(&(back_ptr->back_ptr_node));
+       V3_Free(back_ptr);
+    }
+    
+
+
+    return 0;
+}
+
+
+static int add_rmap(struct v3_vm_info * vm, struct shdw_pg_data * pg_data, addr_t gpa, addr_t gva) {
+    struct cache_vm_state * cache_state = vm->shdw_impl.impl_data;
+    struct list_head * rmap_list = NULL;
+    struct rmap_entry * entry = NULL;
+
+
+    rmap_list = (struct list_head *)v3_htable_search(cache_state->reverse_map, gpa);
+
+    if (rmap_list == NULL) {
+       rmap_list = V3_Malloc(sizeof(struct list_head));
+       INIT_LIST_HEAD(rmap_list);
+
+       v3_htable_insert(cache_state->reverse_map, gpa, (addr_t)rmap_list);
+    }
+    
+    entry = V3_Malloc(sizeof(struct rmap_entry));
+
+    entry->gva = gva;
+    entry->gpa = pg_data->tuple.gpa;
+    entry->pt_type = pg_data->tuple.pt_type;
+
+    list_add(&(entry->rmap_node), rmap_list);
+
+    return 0;
+}
+
+
+
+static int update_rmap_entries(struct v3_vm_info * vm, addr_t gpa) {
+    struct cache_vm_state * cache_state = vm->shdw_impl.impl_data;
+    struct list_head * rmap_list = NULL;
+    struct rmap_entry * entry = NULL;
+    int i = 0;
+
+    rmap_list = (struct list_head *)v3_htable_search(cache_state->reverse_map, gpa);
+
+    if (rmap_list == NULL) {
+       return 0;
+    }
+
+    PrintError("Updating rmap entries\n\t");
+
+    list_for_each_entry(entry, rmap_list, rmap_node) {
+       struct shdw_pg_data * pg_data = NULL;
+       struct guest_pg_tuple tuple = {entry->gpa, entry->pt_type};
+
+       V3_Print("%d \n", i);
+
+       pg_data = (struct shdw_pg_data *)v3_htable_search(cache_state->page_htable, (addr_t)&tuple);
+
+       if (!pg_data) {
+           PrintError("Invalid PTE reference...\n");
+           continue;
+       }
+
+       if (grab_pt(pg_data->hva, entry->gva, entry->pt_type) == -1) {
+           PrintError("Could not invalidate reverse map entry\n");
+           return -1;
+       }
+
+       i++;
+       
+    }
+
+    return 0;
+}
+
+
+
+
+static int link_shdw_pg(struct shdw_pg_data * child_pg, struct shdw_pg_data * parent_pg, addr_t gva) {
+    struct shdw_back_ptr * back_ptr = V3_Malloc(sizeof(struct shdw_back_ptr));
+    memset(back_ptr, 0, sizeof(struct shdw_back_ptr));
+
+    back_ptr->pg_data = parent_pg;
+    back_ptr->gva = gva;
+
+    list_add(&(back_ptr->back_ptr_node), &(child_pg->back_ptrs));
+   
+    return 0;
+}
+
+
+
+static struct shdw_pg_data * find_shdw_pt(struct v3_vm_info * vm, addr_t gpa, page_type_t pt_type) {
+    struct cache_vm_state * cache_state = vm->shdw_impl.impl_data;
+    struct shdw_pg_data * pg_data = NULL;
+    struct guest_pg_tuple tuple = {gpa, pt_type};
+    
+    pg_data = (struct shdw_pg_data *)v3_htable_search(cache_state->page_htable, (addr_t)&tuple);
+
+    if (pg_data != NULL) {
+       // move pg_data to head of queue, for LRU policy
+       list_move(&(pg_data->pg_queue_node), &(cache_state->pg_queue));
+    }
+
+    return pg_data;
+}
+
+
+static int evict_shdw_pg(struct v3_vm_info * vm, addr_t gpa, page_type_t pt_type) {
+    struct cache_vm_state * cache_state = vm->shdw_impl.impl_data;
+    struct shdw_pg_data * pg_data = NULL;
+
+    pg_data = find_shdw_pt(vm, gpa, pt_type);
+
+    PrintError("Evicting GPA: %p, type=%d\n", (void *)gpa, pt_type);
+
+    if (pg_data != NULL) {
+       if (unlink_shdw_pg(pg_data) == -1) {
+           PrintError("Error unlinking page...\n");
+           return -1;
+       }
+       
+       v3_htable_remove(cache_state->page_htable, (addr_t)&(pg_data->tuple), 0);
+       
+
+       // Move Page to free list
+       list_move(&(pg_data->pg_queue_node), &(cache_state->free_list));
+       cache_state->pgs_in_free_list++;
+       cache_state->pgs_in_cache--;
+    }
+
+    return 0;
+}
+
+
+static struct shdw_pg_data * pop_queue_pg(struct v3_vm_info * vm, 
+                                         struct cache_vm_state * cache_state) {
+    struct shdw_pg_data * pg_data = NULL;
+
+    pg_data = list_tail_entry(&(cache_state->pg_queue), struct shdw_pg_data, pg_queue_node);
+
+
+    if (unlink_shdw_pg(pg_data) == -1) {
+       PrintError("Error unlinking cached page\n");
+       return NULL;
+    }
+
+    v3_htable_remove(cache_state->page_htable, (addr_t)&(pg_data->tuple), 0);
+    list_del(&(pg_data->pg_queue_node));
+    
+    cache_state->pgs_in_cache--;
+
+    return pg_data;
+}
+
+static struct shdw_pg_data * create_shdw_pt(struct v3_vm_info * vm, addr_t gpa, page_type_t pt_type) {
+    struct cache_vm_state * cache_state = vm->shdw_impl.impl_data;
+    struct shdw_pg_data * pg_data = NULL;
+
+
+    PrintError("Creating shdw page: gpa=%p, type=%d\n", (void *)gpa, pt_type);
+
+    if (cache_state->pgs_in_cache < cache_state->max_cache_pgs) {
+       pg_data = V3_Malloc(sizeof(struct shdw_pg_data));
+
+       pg_data->hpa = (addr_t)V3_AllocPages(1);
+       pg_data->hva = (void *)V3_VAddr((void *)pg_data->hpa);
+
+    } else if (cache_state->pgs_in_free_list) {
+       // pull from free list
+       pg_data = list_tail_entry(&(cache_state->free_list), struct shdw_pg_data, pg_queue_node);
+       
+       list_del(&(pg_data->pg_queue_node));
+       cache_state->pgs_in_free_list--;
+
+    } else {
+       // pull from queue
+       pg_data = pop_queue_pg(vm, cache_state);
+    }
+
+
+    if (pg_data == NULL) {
+       PrintError("Error creating Shadow Page table page\n");
+       return NULL;
+    }
+
+    memset(pg_data->hva, 0, PAGE_SIZE_4KB);
+
+    pg_data->tuple.gpa = gpa;
+    pg_data->tuple.pt_type = pt_type;
+
+    INIT_LIST_HEAD(&(pg_data->back_ptrs));
+
+    v3_htable_insert(cache_state->page_htable, (addr_t)&(pg_data->tuple), (addr_t)pg_data);
+
+    list_add(&(pg_data->pg_queue_node), &(cache_state->pg_queue));
+    cache_state->pgs_in_cache++;
+
+    return pg_data;
+
+}
+
+
+#include "vmm_shdw_pg_cache_32.h"
+//#include "vmm_shdw_pg_cache_32pae.h"
+//#include "vmm_shdw_pg_cache_64.h"
+
+
+static uint_t cache_hash_fn(addr_t key) {
+    struct guest_pg_tuple * tuple = (struct guest_pg_tuple *)key;
+
+    return v3_hash_buffer((uint8_t *)tuple, sizeof(struct guest_pg_tuple));
+}
+
+static int cache_eq_fn(addr_t key1, addr_t key2) {
+    struct guest_pg_tuple * tuple1 = (struct guest_pg_tuple *)key1;
+    struct guest_pg_tuple * tuple2 = (struct guest_pg_tuple *)key2;
+       
+    return ((tuple1->gpa == tuple2->gpa) && (tuple1->pt_type == tuple2->pt_type));
+}
+
+static uint_t rmap_hash_fn(addr_t key) {
+    return v3_hash_long(key, sizeof(addr_t) * 8);
+}
+
+static int rmap_eq_fn(addr_t key1, addr_t key2) {
+    return (key1 == key2);
+}
+
+
+static int cache_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
+    struct v3_shdw_impl_state * vm_state = &(vm->shdw_impl);
+    struct cache_vm_state * cache_state = NULL;
+    int cache_size = DEFAULT_CACHE_SIZE;
+    char * cache_sz_str = v3_cfg_val(cfg, "cache_size");
+
+    if (cache_sz_str != NULL) {
+       cache_size = ((atoi(cache_sz_str) * 1024 * 1024) / 4096);
+    }
+
+    V3_Print("Shadow Page Cache initialization\n");
+
+    cache_state = V3_Malloc(sizeof(struct cache_vm_state));
+    memset(cache_state, 0, sizeof(struct cache_vm_state));
+
+    cache_state->page_htable = v3_create_htable(0, cache_hash_fn, cache_eq_fn);
+    cache_state->reverse_map = v3_create_htable(0, rmap_hash_fn, rmap_eq_fn);
+    v3_lock_init(&(cache_state->cache_lock));
+    INIT_LIST_HEAD(&(cache_state->pg_queue));
+    INIT_LIST_HEAD(&(cache_state->free_list));
+    cache_state->max_cache_pgs = cache_size;
+
+    vm_state->impl_data = cache_state;
+
+    return 0;
+}
+
+
+static int cache_deinit(struct v3_vm_info * vm) {
+    return -1;
+}
+
+
+static int cache_local_init(struct guest_info * core) {
+    //    struct v3_shdw_pg_state * core_state = &(vm->shdw_pg_state);
+
+
+    return 0;
+}
+
+static int cache_activate_shdw_pt(struct guest_info * core) {
+    switch (v3_get_vm_cpu_mode(core)) {
+
+       case PROTECTED:
+           PrintError("Calling 32 bit cache activation\n");
+           return activate_shadow_pt_32(core);
+       case PROTECTED_PAE:
+           //      return activate_shadow_pt_32pae(core);
+       case LONG:
+       case LONG_32_COMPAT:
+       case LONG_16_COMPAT:
+           //      return activate_shadow_pt_64(core);
+       default:
+           PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
+           return -1;
+    }
+
+    return 0;
+}
+
+static int cache_invalidate_shdw_pt(struct guest_info * core) {
+    // wipe everything...
+    V3_Print("Cache invalidation called\n");
+    
+    return cache_activate_shdw_pt(core);
+}
+
+
+
+static int cache_handle_pf(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
+
+       switch (v3_get_vm_cpu_mode(core)) {
+           case PROTECTED:
+               return handle_shadow_pagefault_32(core, fault_addr, error_code);
+               break;
+           case PROTECTED_PAE:
+               //      return handle_shadow_pagefault_32pae(core, fault_addr, error_code);
+           case LONG:
+           case LONG_32_COMPAT:
+           case LONG_16_COMPAT:
+               //      return handle_shadow_pagefault_64(core, fault_addr, error_code);
+           default:
+               PrintError("Unhandled CPU Mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
+               return -1;
+       }
+}
+
+
+static int cache_handle_invlpg(struct guest_info * core, addr_t vaddr) {
+    PrintError("INVLPG called for %p\n", (void *)vaddr);
+
+    switch (v3_get_vm_cpu_mode(core)) {
+       case PROTECTED:
+           return handle_shadow_invlpg_32(core, vaddr);
+       case PROTECTED_PAE:
+           //    return handle_shadow_invlpg_32pae(core, vaddr);
+       case LONG:
+       case LONG_32_COMPAT:
+       case LONG_16_COMPAT:
+           //    return handle_shadow_invlpg_64(core, vaddr);
+       default:
+           PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
+           return -1;
+    }
+}
+
+
+
+
+
+
+static struct v3_shdw_pg_impl cache_impl = {
+    .name = "SHADOW_CACHE",
+    .init = cache_init, 
+    .deinit = cache_deinit, 
+    .local_init = cache_local_init, 
+    .handle_pagefault = cache_handle_pf, 
+    .handle_invlpg = cache_handle_invlpg,
+    .activate_shdw_pt = cache_activate_shdw_pt, 
+    .invalidate_shdw_pt = cache_invalidate_shdw_pt
+};
+
+
+
+register_shdw_pg_impl(&cache_impl);
diff --git a/palacios/src/palacios/mmu/vmm_shdw_pg_cache_32.h b/palacios/src/palacios/mmu/vmm_shdw_pg_cache_32.h
new file mode 100644 (file)
index 0000000..7d48f6b
--- /dev/null
@@ -0,0 +1,618 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+static inline int activate_shadow_pt_32(struct guest_info * core) {
+    struct cr3_32 * shadow_cr3 = (struct cr3_32 *)&(core->ctrl_regs.cr3);
+    struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(core->shdw_pg_state.guest_cr3);
+    addr_t gpa = BASE_TO_PAGE_ADDR_4KB(guest_cr3->pdt_base_addr);
+    struct shdw_pg_data * shdw_pg = NULL;
+
+    PrintDebug("Activating 32 Bit cacheable page tables\n");
+    shdw_pg = find_shdw_pt(core->vm_info, gpa, PAGE_PD32);
+    
+    PrintError("shdw_pg returned as %p\n", shdw_pg);
+
+    if (shdw_pg == NULL) {
+       shdw_pg = create_shdw_pt(core->vm_info, gpa, PAGE_PD32);
+    }
+
+    PrintDebug("shdw_pg now exists...\n");
+
+    shadow_cr3->pdt_base_addr = PAGE_BASE_ADDR_4KB(shdw_pg->hpa);
+    shadow_cr3->pwt = guest_cr3->pwt;
+    shadow_cr3->pcd = guest_cr3->pcd;
+
+    return 0;
+}
+
+
+
+/* 
+ * *
+ * * 
+ * * 32 bit Page table fault handlers
+ * *
+ * *
+ */
+/*
+static int handle_4MB_shadow_pagefault_pde_32(struct guest_info * core,  addr_t fault_addr, pf_error_t error_code, 
+                                             pt_access_status_t shadow_pde_access, pde32_4MB_t * large_shadow_pde, 
+                                             pde32_4MB_t * large_guest_pde);
+*/
+static int handle_4MB_shadow_pagefault_pte_32(struct guest_info * core,  addr_t fault_addr, pf_error_t error_code, 
+                                             pte32_t * shadow_pt, pde32_4MB_t * large_guest_pde,  struct shdw_pg_data * pt_pg_data);
+
+static int handle_pte_shadow_pagefault_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code,
+                                         pte32_t * shadow_pt,  pte32_t * guest_pt,  struct shdw_pg_data * pt_pg_data);
+
+
+
+
+static inline int handle_shadow_pagefault_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
+    pde32_t * guest_pd = NULL;
+    pde32_t * shadow_pd = CR3_TO_PDE32_VA(core->ctrl_regs.cr3);
+    addr_t guest_cr3 = CR3_TO_PDE32_PA(core->shdw_pg_state.guest_cr3);
+    pt_access_status_t guest_pde_access;
+    pt_access_status_t shadow_pde_access;
+    pde32_t * guest_pde = NULL;
+    pde32_t * shadow_pde = (pde32_t *)&(shadow_pd[PDE32_INDEX(fault_addr)]);
+
+    PrintDebug("Shadow cache page fault handler: %p\n", (void *)fault_addr );
+    PrintDebug("Handling PDE32 Fault\n");
+
+    if (v3_gpa_to_hva(core, guest_cr3, (addr_t*)&guest_pd) == -1) {
+       PrintError("Invalid Guest PDE Address: 0x%p\n",  (void *)guest_cr3);
+       return -1;
+    } 
+
+    guest_pde = (pde32_t *)&(guest_pd[PDE32_INDEX(fault_addr)]);
+
+ // Check the guest page permissions
+    guest_pde_access = v3_can_access_pde32(guest_pd, fault_addr, error_code);
+
+    // Check the shadow page permissions
+    shadow_pde_access = v3_can_access_pde32(shadow_pd, fault_addr, error_code);
+  
+    /* Was the page fault caused by the Guest's page tables? */
+    if (v3_is_guest_pf(guest_pde_access, shadow_pde_access) == 1) {
+       PrintDebug("Injecting PDE pf to guest: (guest access error=%d) (shdw access error=%d)  (pf error code=%d)\n", 
+                  *(uint_t *)&guest_pde_access, *(uint_t *)&shadow_pde_access, *(uint_t *)&error_code);
+       if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
+           PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
+           return -1;
+       }
+       return 0;
+    }
+
+
+
+    if (shadow_pde_access == PT_ACCESS_USER_ERROR) {
+       // 
+       // PDE Entry marked non user
+       //
+       PrintDebug("Shadow Paging User access error (shadow_pde_access=0x%x, guest_pde_access=0x%x)\n", 
+                  shadow_pde_access, guest_pde_access);
+       
+       if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
+           PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
+           return -1;
+       }
+       return 0;
+    } else if ((shadow_pde_access == PT_ACCESS_WRITE_ERROR) && 
+              (guest_pde->large_page == 1)) {
+       
+       ((pde32_4MB_t *)guest_pde)->dirty = 1;
+       shadow_pde->writable = guest_pde->writable;
+       return 0;
+    } else if ((shadow_pde_access != PT_ACCESS_NOT_PRESENT) &&
+              (shadow_pde_access != PT_ACCESS_OK)) {
+       // inject page fault in guest
+       if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
+           PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
+           return -1;
+       }
+       PrintDebug("Unknown Error occurred (shadow_pde_access=%d)\n", shadow_pde_access);
+       PrintDebug("Manual Says to inject page fault into guest\n");
+       return 0;
+    }
+
+  
+    pte32_t * shadow_pt = NULL;
+    pte32_t * guest_pt = NULL;
+
+
+    /*  Set up cache state */
+    addr_t gpa = BASE_TO_PAGE_ADDR_4KB(guest_pde->pt_base_addr);
+
+       
+    struct shdw_pg_data * shdw_page = NULL;
+    page_type_t pt_type = PAGE_PT32;
+
+    if (guest_pde->large_page == 1) {
+       // Handle Large pages, for this we use the PAGE_4MB type...
+       pt_type = PAGE_4MB;
+    }
+
+    shdw_page = find_shdw_pt(core->vm_info, gpa, pt_type);
+       
+    if (shdw_page == NULL) {
+       shdw_page = create_shdw_pt(core->vm_info, gpa, pt_type);
+    }
+    
+    // update current reverse map entries...
+    // We are now using this page in a PT, so:
+    //     any existing writable mappings must be updated
+    update_rmap_entries(core->vm_info, gpa);
+    
+    struct shdw_pg_data * parent_page = find_shdw_pt(core->vm_info, guest_cr3, PAGE_PD32);
+    
+    if (parent_page != NULL) {
+       // add back pointer to PDE, if it exists
+       link_shdw_pg(shdw_page, parent_page, PAGE_ADDR_4KB(fault_addr));
+    }
+
+
+    // Get the next shadow page  level, allocate if not present
+
+    if (shadow_pde_access == PT_ACCESS_NOT_PRESENT) {
+
+       /* Currently we do not support large pages
+          This requires us to scan the large page for Page table pages, and split the entries if they exist. 
+          Its easier to just ignore this for now...
+          if ((core->use_large_pages == 1) && (guest_pde->large_page == 1)) {
+          // Check underlying physical memory map to see if a large page is viable
+          addr_t gpa_4MB = BASE_TO_PAGE_ADDR_4MB(((pde32_4MB_t *)guest_pde)->page_base_addr);
+          uint32_t page_size = v3_get_max_page_size(core, gpa_4MB, PROTECTED);
+          
+          if (page_size == PAGE_SIZE_4MB) {
+          PrintDebug("using large page for fault_addr %p (gpa=%p)\n", (void *)fault_addr, (void *)gpa_4MB); 
+          if (handle_4MB_shadow_pagefault_pde_32(core, fault_addr, error_code, shadow_pde_access,
+          (pde32_4MB_t *)shadow_pde, (pde32_4MB_t *)guest_pde) == -1) {
+          PrintError("Error handling large pagefault with large page\n");
+          return -1;
+          }
+          
+          return 0;
+          }
+          }
+       */
+
+
+
+       
+       shadow_pt = (pte32_t *)(shdw_page->hva);
+
+
+
+       shadow_pde->present = 1;
+       shadow_pde->user_page = guest_pde->user_page;
+
+
+       if (guest_pde->large_page == 0) {
+           shadow_pde->writable = guest_pde->writable;
+       } else {
+           // This large page flag is temporary until we can get a working cache....
+           ((pde32_4MB_t *)guest_pde)->vmm_info = V3_LARGE_PG;
+
+           if (error_code.write) {
+               shadow_pde->writable = guest_pde->writable;
+               ((pde32_4MB_t *)guest_pde)->dirty = 1;
+           } else {
+               shadow_pde->writable = 0;
+               ((pde32_4MB_t *)guest_pde)->dirty = 0;
+           }
+       }
+      
+       // VMM Specific options
+       shadow_pde->write_through = guest_pde->write_through;
+       shadow_pde->cache_disable = guest_pde->cache_disable;
+       shadow_pde->global_page = guest_pde->global_page;
+       //
+      
+       guest_pde->accessed = 1;
+      
+       shadow_pde->pt_base_addr = PAGE_BASE_ADDR(shdw_page->hpa);
+    } else {
+       shadow_pt = (pte32_t *)V3_VAddr((void *)BASE_TO_PAGE_ADDR(shadow_pde->pt_base_addr));
+    }
+
+    
+    if (guest_pde->large_page == 0) {
+       if (v3_gpa_to_hva(core, BASE_TO_PAGE_ADDR(guest_pde->pt_base_addr), (addr_t*)&guest_pt) == -1) {
+           // Machine check the guest
+           PrintDebug("Invalid Guest PTE Address: 0x%p\n", (void *)BASE_TO_PAGE_ADDR(guest_pde->pt_base_addr));
+           v3_raise_exception(core, MC_EXCEPTION);
+           return 0;
+       }
+
+       if (handle_pte_shadow_pagefault_32(core, fault_addr, error_code, shadow_pt, guest_pt, shdw_page)  == -1) {
+           PrintError("Error handling Page fault caused by PTE\n");
+           return -1;
+       }
+    } else {
+       if (handle_4MB_shadow_pagefault_pte_32(core, fault_addr, error_code, shadow_pt, (pde32_4MB_t *)guest_pde, shdw_page) == -1) {
+           PrintError("Error handling large pagefault\n");
+           return -1;
+       }       
+    }
+
+    return 0;
+}
+
+
+
+
+static int handle_pte_shadow_pagefault_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code,
+                                         pte32_t * shadow_pt, pte32_t * guest_pt, struct shdw_pg_data * pt_pg_data) {
+
+    pt_access_status_t guest_pte_access;
+    pt_access_status_t shadow_pte_access;
+    pte32_t * guest_pte = (pte32_t *)&(guest_pt[PTE32_INDEX(fault_addr)]);;
+    pte32_t * shadow_pte = (pte32_t *)&(shadow_pt[PTE32_INDEX(fault_addr)]);
+    addr_t guest_pa = BASE_TO_PAGE_ADDR((addr_t)(guest_pte->page_base_addr)) +  PAGE_OFFSET(fault_addr);
+
+    struct v3_mem_region * shdw_reg =  v3_get_mem_region(core->vm_info, core->cpu_id, guest_pa);
+
+    if (shdw_reg == NULL) {
+       // Inject a machine check in the guest
+       PrintDebug("Invalid Guest Address in page table (0x%p)\n", (void *)guest_pa);
+       v3_raise_exception(core, MC_EXCEPTION);
+       return 0;
+    }
+
+    // Check the guest page permissions
+    guest_pte_access = v3_can_access_pte32(guest_pt, fault_addr, error_code);
+
+    // Check the shadow page permissions
+    shadow_pte_access = v3_can_access_pte32(shadow_pt, fault_addr, error_code);
+  
+  
+    /* Was the page fault caused by the Guest's page tables? */
+    if (v3_is_guest_pf(guest_pte_access, shadow_pte_access) == 1) {
+
+       PrintDebug("Access error injecting pf to guest (guest access error=%d) (pf error code=%d)\n", 
+                  guest_pte_access, *(uint_t*)&error_code);
+       
+
+       //   inject:
+       if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
+           PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
+           return -1;
+       }       
+
+       return 0; 
+    }
+
+  
+  
+    if (shadow_pte_access == PT_ACCESS_OK) {
+       // Inconsistent state...
+       // Guest Re-Entry will flush page tables and everything should now work
+       PrintDebug("Inconsistent state... Guest re-entry should flush tlb\n");
+       return 0;
+    }
+
+
+    if (shadow_pte_access == PT_ACCESS_NOT_PRESENT) {
+       // Page Table Entry Not Present
+       PrintDebug("guest_pa =%p\n", (void *)guest_pa);
+
+       if ((shdw_reg->flags.alloced == 1) && (shdw_reg->flags.read == 1)) {
+           addr_t shadow_pa = 0;
+
+           if (v3_gpa_to_hpa(core, guest_pa, &shadow_pa) == -1) {
+               PrintError("could not translate page fault address (%p)\n", (void *)guest_pa);
+               return -1;
+           }
+
+           shadow_pte->page_base_addr = PAGE_BASE_ADDR(shadow_pa);
+
+           PrintDebug("\tMapping shadow page (%p)\n", (void *)BASE_TO_PAGE_ADDR(shadow_pte->page_base_addr));
+      
+           shadow_pte->present = guest_pte->present;
+           shadow_pte->user_page = guest_pte->user_page;
+      
+           //set according to VMM policy
+           shadow_pte->write_through = guest_pte->write_through;
+           shadow_pte->cache_disable = guest_pte->cache_disable;
+           shadow_pte->global_page = guest_pte->global_page;
+           //
+      
+           guest_pte->accessed = 1;
+      
+           if (guest_pte->dirty == 1) {
+               shadow_pte->writable = guest_pte->writable;
+           } else if ((guest_pte->dirty == 0) && (error_code.write == 1)) {
+               shadow_pte->writable = guest_pte->writable;
+               guest_pte->dirty = 1;
+           } else if ((guest_pte->dirty == 0) && (error_code.write == 0)) {
+               shadow_pte->writable = 0;
+           }
+
+
+           if (shdw_reg->flags.write == 0) {
+               shadow_pte->writable = 0;
+           }
+
+
+           // Add this PTE to the reverse map...
+           // This allows us to update this PTE entry if it gets turned into a PT page
+           add_rmap(core->vm_info, pt_pg_data, PAGE_ADDR_4KB(guest_pa), PAGE_ADDR_4KB(fault_addr));
+
+           // Check for cache entries and mark page read-only, plus tag
+           {
+               struct shdw_pg_data * pt_page = NULL;
+               addr_t pg_gpa = PAGE_ADDR_4KB(guest_pa);
+
+               pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PT32);
+               
+               if (pt_page == NULL) {
+                   pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PD32);
+               }
+
+               if (pt_page != NULL) {
+                   // This is a page table page... 
+                   shadow_pte->writable = 0;
+                   shadow_pte->vmm_info = V3_CACHED_PG;
+               }
+           }
+
+       } else {
+           // Page fault on unhandled memory region
+           
+           if (shdw_reg->unhandled(core, fault_addr, guest_pa, shdw_reg, error_code) == -1) {
+               PrintError("Special Page fault handler returned error for address: %p\n",  (void *)fault_addr);
+               return -1;
+           }
+       }
+    } else if (shadow_pte_access == PT_ACCESS_WRITE_ERROR) {
+       guest_pte->dirty = 1;
+
+       // check for cache tag and handle invalidations if it exists.
+       if (shadow_pte->vmm_info == V3_CACHED_PG) {
+           addr_t pg_gpa = PAGE_ADDR_4KB(guest_pa);
+
+           PrintError("Evicting on a small page\n");
+
+
+           if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PD32) == -1) {
+               PrintError("Error Evicting PAGE_PD32 cache entry\n");
+               return -1;
+           }
+
+           if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PT32) == -1) {
+               PrintError("Error Evicting PAGE_PT32 cache entry\n");
+               return -1;
+           }
+
+           shadow_pte->vmm_info &= ~V3_CACHED_PG;
+       }
+
+
+       if (shdw_reg->flags.write == 1) {
+           PrintDebug("Shadow PTE Write Error\n");
+           shadow_pte->writable = guest_pte->writable;
+       } else {
+           if (shdw_reg->unhandled(core, fault_addr, guest_pa, shdw_reg, error_code) == -1) {
+               PrintError("Special Page fault handler returned error for address: %p\n",  (void *)fault_addr);
+               return -1;
+           }
+       }
+
+
+       return 0;
+
+    } else {
+       // Inject page fault into the guest     
+       if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
+           PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
+           return -1;
+       }
+
+       PrintError("PTE Page fault fell through... Not sure if this should ever happen\n");
+       PrintError("Manual Says to inject page fault into guest\n");
+       return -1;
+    }
+
+    return 0;
+}
+
+// Handle a 4MB page fault with small pages in the PTE
+static int handle_4MB_shadow_pagefault_pte_32(struct guest_info * core, 
+                                             addr_t fault_addr, pf_error_t error_code, 
+                                             pte32_t * shadow_pt, pde32_4MB_t * large_guest_pde, 
+                                             struct shdw_pg_data * pt_pg_data) 
+{
+    pt_access_status_t shadow_pte_access = v3_can_access_pte32(shadow_pt, fault_addr, error_code);
+    pte32_t * shadow_pte = (pte32_t *)&(shadow_pt[PTE32_INDEX(fault_addr)]);
+    addr_t guest_fault_pa = BASE_TO_PAGE_ADDR_4MB(large_guest_pde->page_base_addr) + PAGE_OFFSET_4MB(fault_addr);  
+
+
+    PrintDebug("Handling 4MB fault (guest_fault_pa=%p) (error_code=%x)\n", (void *)guest_fault_pa, *(uint_t*)&error_code);
+    PrintDebug("ShadowPT=%p, LargeGuestPDE=%p\n", shadow_pt, large_guest_pde);
+
+    struct v3_mem_region * shdw_reg = v3_get_mem_region(core->vm_info, core->cpu_id, guest_fault_pa);
+
+    if (shdw_reg == NULL) {
+       // Inject a machine check in the guest
+       PrintDebug("Invalid Guest Address in page table (0x%p)\n", (void *)guest_fault_pa);
+       v3_raise_exception(core, MC_EXCEPTION);
+       return -1;
+    }
+
+    if (shadow_pte_access == PT_ACCESS_OK) {
+       // Inconsistent state...
+       // Guest Re-Entry will flush tables and everything should now workd
+       PrintDebug("Inconsistent state... Guest re-entry should flush tlb\n");
+       return 0;
+    }
+
+  
+    if (shadow_pte_access == PT_ACCESS_NOT_PRESENT) {
+       // Get the guest physical address of the fault
+
+       if ((shdw_reg->flags.alloced == 1) && 
+           (shdw_reg->flags.read  == 1)) {
+           addr_t shadow_pa = 0;
+
+
+           if (v3_gpa_to_hpa(core, guest_fault_pa, &shadow_pa) == -1) {
+               PrintError("could not translate page fault address (%p)\n", (void *)guest_fault_pa);
+               return -1;
+           }
+
+           shadow_pte->page_base_addr = PAGE_BASE_ADDR(shadow_pa);
+
+           PrintDebug("\tMapping shadow page (%p)\n", (void *)BASE_TO_PAGE_ADDR(shadow_pte->page_base_addr));
+
+           shadow_pte->present = 1;
+
+           /* We are assuming that the PDE entry has precedence
+            * so the Shadow PDE will mirror the guest PDE settings, 
+            * and we don't have to worry about them here
+            * Allow everything
+            */
+           shadow_pte->user_page = 1;
+
+           //set according to VMM policy
+           shadow_pte->write_through = large_guest_pde->write_through;
+           shadow_pte->cache_disable = large_guest_pde->cache_disable;
+           shadow_pte->global_page = large_guest_pde->global_page;
+           //
+      
+
+           if (shdw_reg->flags.write == 0) {
+               shadow_pte->writable = 0;
+           } else {
+               shadow_pte->writable = 1;
+           }
+
+
+           // Add this PTE to the reverse map...
+           // This allows us to update this PTE entry if it gets turned into a PT page sometime in the future
+           add_rmap(core->vm_info, pt_pg_data, PAGE_ADDR_4KB(guest_fault_pa), PAGE_ADDR_4KB(fault_addr));
+
+           // Check for cache entries and mark page read-only, plus tag
+           {
+               struct shdw_pg_data * pt_page = NULL;
+               addr_t pg_gpa = PAGE_ADDR_4KB(guest_fault_pa);
+
+               pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PT32);
+               
+               if (pt_page == NULL) {
+                   pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PD32);
+               }
+
+               if (pt_page != NULL) {
+                   // This is a page table page... 
+                   shadow_pte->writable = 0;
+                   shadow_pte->vmm_info = V3_CACHED_PG;
+               }
+
+           }
+
+       } else {
+           if (shdw_reg->unhandled(core, fault_addr, guest_fault_pa, shdw_reg, error_code) == -1) {
+               PrintError("Special Page Fault handler returned error for address: %p\n", (void *)fault_addr);
+               return -1;
+           }
+       }
+    } else if (shadow_pte_access == PT_ACCESS_WRITE_ERROR) {
+
+       // check for cache tag and handle invalidations if it exists.
+       if (shadow_pte->vmm_info == V3_CACHED_PG) {
+           addr_t pg_gpa = PAGE_ADDR_4KB(guest_fault_pa);
+           PrintError("Evicting on a large page\n");
+
+           if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PD32) == -1) {
+               PrintError("Error Evicting PAGE_PD32 cache entry\n");
+               return -1;
+           }
+
+           if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PT32) == -1) {
+               PrintError("Error Evicting PAGE_PT32 cache entry\n");
+               return -1;
+           }
+
+           shadow_pte->vmm_info &= ~V3_CACHED_PG;
+       }
+
+
+       if (shdw_reg->flags.write == 0) {
+           if (shdw_reg->unhandled(core, fault_addr, guest_fault_pa, shdw_reg, error_code) == -1) {
+               PrintError("Special Page Fault handler returned error for address: %p\n", (void *)fault_addr);
+               return -1;
+           }
+       } else {
+           // set writable after cache eviction, unless overruled by region setting
+           shadow_pte->writable = 1;
+       }
+
+    } else {
+       PrintError("Error in large page fault handler...\n");
+       PrintError("This case should have been handled at the top level handler\n");
+       return -1;
+    }
+
+    PrintDebug("Returning from large page->small page fault handler\n");
+    return 0;
+}
+
+
+/* If we start to optimize we should look up the guest pages in the cache... */
+static inline int handle_shadow_invlpg_32(struct guest_info * core, addr_t vaddr) {
+    pde32_t * shadow_pd = (pde32_t *)CR3_TO_PDE32_VA(core->ctrl_regs.cr3);
+    pde32_t * shadow_pde = (pde32_t *)&shadow_pd[PDE32_INDEX(vaddr)];
+
+    addr_t guest_cr3 =  CR3_TO_PDE32_PA(core->shdw_pg_state.guest_cr3);
+    pde32_t * guest_pd = NULL;
+    pde32_t * guest_pde;
+
+    if (v3_gpa_to_hva(core, guest_cr3, (addr_t*)&guest_pd) == -1) {
+       PrintError("Invalid Guest PDE Address: 0x%p\n",  (void *)guest_cr3);
+       return -1;
+    }
+  
+    guest_pde = (pde32_t *)&(guest_pd[PDE32_INDEX(vaddr)]);
+  
+
+    if (guest_pde->large_page == 1) {
+       shadow_pde->present = 0;
+       PrintError("\tInvalidating Large Page (gpa=%p)\n", (void *)BASE_TO_PAGE_ADDR_4MB(guest_pde->pt_base_addr));
+    } else if (shadow_pde->present == 1) {
+       pte32_t * shadow_pt = (pte32_t *)(addr_t)BASE_TO_PAGE_ADDR_4KB(shadow_pde->pt_base_addr);
+       pte32_t * shadow_pte = (pte32_t *) V3_VAddr( (void*) &shadow_pt[PTE32_INDEX(vaddr)] );
+
+       
+
+           PrintError("\tInvalidating small page\n");
+
+
+       shadow_pte->present = 0;
+    } else {
+
+       PrintError("What the fuck?\n");
+    }
+    return 0;
+}
+
+
index af23fed..e8c7209 100644 (file)
@@ -205,7 +205,7 @@ static addr_t map_swp_page(struct v3_vm_info * vm, pte32_t * shadow_pte, pte32_t
     shdw_ptr_list = (struct list_head *)v3_htable_search(swap_state->shdw_ptr_ht, (addr_t)*(uint32_t *)guest_pte);
 
     if (shdw_ptr_list == NULL) {
-       shdw_ptr_list = (struct list_head *)V3_Malloc(sizeof(struct list_head *));
+       shdw_ptr_list = (struct list_head *)V3_Malloc(sizeof(struct list_head));
 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
        swap_state->list_size++;
 #endif
index 6ccf509..738d2ee 100644 (file)
@@ -327,11 +327,12 @@ static int determine_paging_mode(struct guest_info *info, v3_cfg_tree_t * core_c
        return -1;
     }
 
-    if (strcasecmp(v3_cfg_val(pg_tree, "large_pages"), "true") == 0) {
-       info->use_large_pages = 1;
-       PrintDebug("Use of large pages in memory virtualization enabled.\n");
+    if (v3_cfg_val(pg_tree, "large_pages") != NULL) {
+       if (strcasecmp(v3_cfg_val(pg_tree, "large_pages"), "true") == 0) {
+           info->use_large_pages = 1;
+           PrintDebug("Use of large pages in memory virtualization enabled.\n");
+       }
     }
-
     return 0;
 }