+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
#include <palacios/vmm_shadow_paging.h>
+
#include <palacios/vmm.h>
#include <palacios/vm_guest_mem.h>
+#include <palacios/vmm_decoder.h>
+#include <palacios/vmm_ctrl_regs.h>
+
+#include <palacios/vmm_hashtable.h>
+
+#ifndef DEBUG_SHADOW_PAGING
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+
+/***
+ *** There be dragons
+ ***/
+
+
+struct guest_table {
+ addr_t cr3;
+ struct list_head link;
+};
+
+
+struct backptr {
+ addr_t ptr;
+ struct list_head link;
+};
+
+
+struct shadow_page_data {
+ addr_t ptr;
+ addr_t guest_addr;
+
+ struct list_head backptrs;
+ struct list_head guest_tables;
+};
+
+
+
+
+//DEFINE_HASHTABLE_INSERT(add_cr3_to_cache, addr_t, struct hashtable *);
+//DEFINE_HASHTABLE_SEARCH(find_cr3_in_cache, addr_t, struct hashtable *);
+//DEFINE_HASHTABLE_REMOVE(del_cr3_from_cache, addr_t, struct hashtable *, 0);
+
+
+DEFINE_HASHTABLE_INSERT(add_pte_map, addr_t, addr_t);
+DEFINE_HASHTABLE_SEARCH(find_pte_map, addr_t, addr_t);
+//DEFINE_HASHTABLE_REMOVE(del_pte_map, addr_t, addr_t, 0);
+
+
+
+static uint_t pte_hash_fn(addr_t key) {
+ return hash_long(key, 32);
+}
+
+static int pte_equals(addr_t key1, addr_t key2) {
+ return (key1 == key2);
+}
-extern struct vmm_os_hooks * os_hooks;
+static addr_t create_new_shadow_pt();
+static void inject_guest_pf(struct guest_info * info, addr_t fault_addr, pf_error_t error_code);
+static int is_guest_pf(pt_access_status_t guest_access, pt_access_status_t shadow_access);
-int init_shadow_page_state(struct shadow_page_state * state) {
- state->guest_mode = PDE32;
- state->shadow_mode = PDE32;
+#include "vmm_shadow_paging_32.h"
+#include "vmm_shadow_paging_32pae.h"
+#include "vmm_shadow_paging_64.h"
+
+
+
+int v3_init_shadow_page_state(struct guest_info * info) {
+ struct shadow_page_state * state = &(info->shdw_pg_state);
- state->guest_cr3.r_reg = 0;
- state->shadow_cr3.r_reg = 0;
+ state->guest_cr3 = 0;
+ state->guest_cr0 = 0;
+
+ state->cached_ptes = NULL;
return 0;
}
-
-int wholesale_update_shadow_page_state(struct guest_info * guest_info) {
- unsigned i, j;
- pde32_t * guest_pde;
- pde32_t * shadow_pde;
- struct shadow_page_state * state = &(guest_info->shdw_pg_state);
- // For now, we'll only work with PDE32
- if (state->guest_mode != PDE32) {
+
+
+
+// Reads the guest CR3 register
+// creates new shadow page tables
+// updates the shadow CR3 register to point to the new pts
+int v3_activate_shadow_pt(struct guest_info * info) {
+ switch (info->cpu_mode) {
+
+ case PROTECTED:
+ return activate_shadow_pt_32(info);
+ case PROTECTED_PAE:
+ return activate_shadow_pt_32pae(info);
+ case LONG:
+ case LONG_32_COMPAT:
+ case LONG_16_COMPAT:
+ return activate_shadow_pt_64(info);
+ default:
+ PrintError("Invalid CPU mode: %d\n", info->cpu_mode);
return -1;
}
- shadow_pde = (pde32_t *)(CR3_TO_PDE32(state->shadow_cr3.e_reg.low));
+ return 0;
+}
+
- if (host_pa_to_host_va(CR3_TO_PDE32(state->guest_cr3.e_reg.low), (addr_t*)&guest_pde) != 0) {
+int v3_activate_passthrough_pt(struct guest_info * info) {
+ // For now... But we need to change this....
+ // As soon as shadow paging becomes active the passthrough tables are hosed
+ // So this will cause chaos if it is called at that time
+
+ info->ctrl_regs.cr3 = *(addr_t*)&(info->direct_map_pt);
+ //PrintError("Activate Passthrough Page tables not implemented\n");
+ return 0;
+}
+
+
+
+int v3_handle_shadow_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) {
+
+ if (info->mem_mode == PHYSICAL_MEM) {
+ // If paging is not turned on we need to handle the special cases
+
+#ifdef DEBUG_SHADOW_PAGING
+ PrintHostPageTree(info->cpu_mode, fault_addr, info->ctrl_regs.cr3);
+ PrintGuestPageTree(info, fault_addr, info->shdw_pg_state.guest_cr3);
+#endif
+
+ return handle_special_page_fault(info, fault_addr, fault_addr, error_code);
+ } else if (info->mem_mode == VIRTUAL_MEM) {
+
+ switch (info->cpu_mode) {
+ case PROTECTED:
+ return handle_shadow_pagefault_32(info, fault_addr, error_code);
+ break;
+ case PROTECTED_PAE:
+ return handle_shadow_pagefault_32pae(info, fault_addr, error_code);
+ case LONG:
+ return handle_shadow_pagefault_64(info, fault_addr, error_code);
+ break;
+ default:
+ PrintError("Unhandled CPU Mode\n");
+ return -1;
+ }
+ } else {
+ PrintError("Invalid Memory mode\n");
return -1;
}
+}
+
+
+
+static addr_t create_new_shadow_pt() {
+ void * host_pde = 0;
+
+ host_pde = V3_VAddr(V3_AllocPages(1));
+ memset(host_pde, 0, PAGE_SIZE);
- // Delete the current page table
- delete_page_tables_pde32(shadow_pde);
+ return (addr_t)host_pde;
+}
- shadow_pde = os_hooks->allocate_pages(1);
- state->shadow_cr3.e_reg.low = (addr_t)shadow_pde;
+static void inject_guest_pf(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) {
+ if (info->enable_profiler) {
+ info->profiler.guest_pf_cnt++;
+ }
- state->shadow_mode = PDE32;
+ info->ctrl_regs.cr2 = fault_addr;
+ v3_raise_exception_with_error(info, PF_EXCEPTION, *(uint_t *)&error_code);
+}
- for (i = 0; i < MAX_PDE32_ENTRIES; i++) {
- shadow_pde[i] = guest_pde[i];
- // The shadow can be identical to the guest if it's not present
- if (!shadow_pde[i].present) {
- continue;
+static int is_guest_pf(pt_access_status_t guest_access, pt_access_status_t shadow_access) {
+ /* basically the reasoning is that there can be multiple reasons for a page fault:
+ If there is a permissions failure for a page present in the guest _BUT_
+ the reason for the fault was that the page is not present in the shadow,
+ _THEN_ we have to map the shadow page in and reexecute, this will generate
+ a permissions fault which is _THEN_ valid to send to the guest
+ _UNLESS_ both the guest and shadow have marked the page as not present
+
+ whew...
+ */
+ if (guest_access != PT_ACCESS_OK) {
+ // Guest Access Error
+
+ if ((shadow_access != PT_ACCESS_NOT_PRESENT) &&
+ (guest_access != PT_ACCESS_NOT_PRESENT)) {
+ // aka (guest permission error)
+ return 1;
}
- if (shadow_pde[i].large_pages) {
- // large page - just map it through shadow map to generate its physical location
- addr_t guest_addr = PAGE_ADDR(shadow_pde[i].pt_base_addr);
- addr_t host_addr;
- shadow_region_t * ent;
+ if ((shadow_access == PT_ACCESS_NOT_PRESENT) &&
+ (guest_access == PT_ACCESS_NOT_PRESENT)) {
+ // Page tables completely blank, handle guest first
+ return 1;
+ }
+
+ // Otherwise we'll handle the guest fault later...?
+ }
+
+ return 0;
+}
- ent = get_shadow_region_by_addr(&(guest_info->mem_map), guest_addr);
-
- if (!ent) {
- // FIXME Panic here - guest is trying to map to physical memory
- // it does not own in any way!
- return -1;
- }
- // FIXME Bounds check here to see if it's trying to trick us
-
- switch (ent->host_type) {
- case HOST_REGION_PHYSICAL_MEMORY:
- // points into currently allocated physical memory, so we just
- // set up the shadow to point to the mapped location
- if (guest_pa_to_host_pa(guest_info, guest_addr, &host_addr)) {
- // Panic here
- return -1;
- }
-
- shadow_pde[i].pt_base_addr = PAGE_ALIGNED_ADDR(host_addr);
- // FIXME set vmm_info bits here
- break;
- case HOST_REGION_UNALLOCATED:
- // points to physical memory that is *allowed* but that we
- // have not yet allocated. We mark as not present and set a
- // bit to remind us to allocate it later
- shadow_pde[i].present = 0;
- // FIXME Set vminfo bits here so that we know that we will be
- // allocating it later
- break;
- case HOST_REGION_NOTHING:
- // points to physical memory that is NOT ALLOWED.
- // We will mark it as not present and set a bit to remind
- // us that it's bad later and insert a GPF then
- shadow_pde[i].present = 0;
- break;
- case HOST_REGION_MEMORY_MAPPED_DEVICE:
- case HOST_REGION_REMOTE:
- case HOST_REGION_SWAPPED:
- default:
- // Panic. Currently unhandled
- return -1;
- break;
- }
- } else {
- pte32_t * guest_pte;
- pte32_t * shadow_pte;
- addr_t guest_addr;
- addr_t guest_pte_host_addr;
- shadow_region_t * ent;
-
- // small page - set PDE and follow down to the child table
- shadow_pde[i] = guest_pde[i];
-
- guest_addr = PAGE_ADDR(guest_pde[i].pt_base_addr);
-
- // Allocate a new second level page table for the shadow
- shadow_pte = os_hooks->allocate_pages(1);
-
- // make our first level page table in the shadow point to it
- shadow_pde[i].pt_base_addr = PAGE_ALIGNED_ADDR(shadow_pte);
-
- ent = get_shadow_region_by_addr(&(guest_info->mem_map), guest_addr);
-
- /* JRL: This is bad.... */
- // For now the guest Page Table must always be mapped to host physical memory
- /* If we swap out a page table or if it isn't present for some reason, this turns real ugly */
- if ((!ent) || (ent->host_type != HOST_REGION_PHYSICAL_MEMORY)) {
- // FIXME Panic here - guest is trying to map to physical memory
- // it does not own in any way!
- return -1;
- }
- // Address of the relevant second level page table in the guest
- if (guest_pa_to_host_pa(guest_info, guest_addr, &guest_pte_host_addr)) {
- // Panic here
- return -1;
- }
- // host_addr now contains the host physical address for the guest's 2nd level page table
- // Now we transform it to relevant virtual address
- guest_pte = os_hooks->paddr_to_vaddr((void *)guest_pte_host_addr);
- // Now we walk through the second level guest page table
- // and clone it into the shadow
- for (j = 0; j < MAX_PTE32_ENTRIES; j++) {
- shadow_pte[j] = guest_pte[j];
- addr_t guest_addr = PAGE_ADDR(shadow_pte[j].page_base_addr);
-
- shadow_region_t * ent;
- ent = get_shadow_region_by_addr(&(guest_info->mem_map), guest_addr);
+
+
+/* Currently Does not work with Segmentation!!! */
+int v3_handle_shadow_invlpg(struct guest_info * info)
+{
+ if (info->mem_mode != VIRTUAL_MEM) {
+ // Paging must be turned on...
+ // should handle with some sort of fault I think
+ PrintError("ERROR: INVLPG called in non paged mode\n");
+ return -1;
+ }
+
+
+ if (info->cpu_mode != PROTECTED) {
+ PrintError("Unsupported CPU mode (mode=%s)\n", v3_cpu_mode_to_str(info->cpu_mode));
+ return -1;
+ }
+
+ uchar_t instr[15];
+ int index = 0;
+
+ int ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
+ if (ret != 15) {
+ PrintError("Could not read instruction 0x%p (ret=%d)\n", (void *)(addr_t)(info->rip), ret);
+ return -1;
+ }
+
+
+ /* Can INVLPG work with Segments?? */
+ while (is_prefix_byte(instr[index])) {
+ index++;
+ }
+
+
+ if( (instr[index + 0] != (uchar_t) 0x0f) ||
+ (instr[index + 1] != (uchar_t) 0x01) ) {
+ PrintError("invalid Instruction Opcode\n");
+ PrintTraceMemDump(instr, 15);
+ return -1;
+ }
+
+ addr_t first_operand;
+ addr_t second_operand;
+ addr_t guest_cr3 = CR3_TO_PDE32_PA(info->shdw_pg_state.guest_cr3);
+
+ pde32_t * guest_pd = NULL;
+
+ if (guest_pa_to_host_va(info, guest_cr3, (addr_t*)&guest_pd) == -1) {
+ PrintError("Invalid Guest PDE Address: 0x%p\n", (void *)guest_cr3);
+ return -1;
+ }
+
+ index += 2;
+
+ v3_operand_type_t addr_type = decode_operands32(&(info->vm_regs), instr + index, &index, &first_operand, &second_operand, REG32);
+
+ if (addr_type != MEM_OPERAND) {
+ PrintError("Invalid Operand type\n");
+ return -1;
+ }
+
+ pde32_t * shadow_pd = (pde32_t *)CR3_TO_PDE32_VA(info->ctrl_regs.cr3);
+ pde32_t * shadow_pde = (pde32_t *)&shadow_pd[PDE32_INDEX(first_operand)];
+ pde32_t * guest_pde;
+
+ //PrintDebug("PDE Index=%d\n", PDE32_INDEX(first_operand));
+ //PrintDebug("FirstOperand = %x\n", first_operand);
+
+ PrintDebug("Invalidating page for %p\n", (void *)first_operand);
+
+ guest_pde = (pde32_t *)&(guest_pd[PDE32_INDEX(first_operand)]);
+
+ if (guest_pde->large_page == 1) {
+ shadow_pde->present = 0;
+ PrintDebug("Invalidating Large Page\n");
+ } else
+ if (shadow_pde->present == 1) {
+ pte32_t * shadow_pt = (pte32_t *)(addr_t)BASE_TO_PAGE_ADDR(shadow_pde->pt_base_addr);
+ pte32_t * shadow_pte = (pte32_t *) V3_VAddr( (void*) &shadow_pt[PTE32_INDEX(first_operand)] );
- if (!ent) {
- // FIXME Panic here - guest is trying to map to physical memory
- // it does not own in any way!
- return -1;
- }
-
- switch (ent->host_type) {
- case HOST_REGION_PHYSICAL_MEMORY:
- {
- addr_t host_addr;
-
- // points into currently allocated physical memory, so we just
- // set up the shadow to point to the mapped location
- if (guest_pa_to_host_pa(guest_info, guest_addr, &host_addr)) {
- // Panic here
- return -1;
- }
-
- shadow_pte[j].page_base_addr = PAGE_ALIGNED_ADDR(host_addr);
- // FIXME set vmm_info bits here
- break;
- }
- case HOST_REGION_UNALLOCATED:
- // points to physical memory that is *allowed* but that we
- // have not yet allocated. We mark as not present and set a
- // bit to remind us to allocate it later
- shadow_pte[j].present = 0;
- // FIXME Set vminfo bits here so that we know that we will be
- // allocating it later
- break;
- case HOST_REGION_NOTHING:
- // points to physical memory that is NOT ALLOWED.
- // We will mark it as not present and set a bit to remind
- // us that it's bad later and insert a GPF then
- shadow_pte[j].present = 0;
- break;
- case HOST_REGION_MEMORY_MAPPED_DEVICE:
- case HOST_REGION_REMOTE:
- case HOST_REGION_SWAPPED:
- default:
- // Panic. Currently unhandled
- return -1;
- break;
- }
- }
+#ifdef DEBUG_SHADOW_PAGING
+ PrintDebug("Setting not present\n");
+ PrintPTEntry(PAGE_PT32, first_operand, shadow_pte);
+#endif
+
+ shadow_pte->present = 0;
}
- }
+
+ info->rip += index;
+
return 0;
}
-
+