void * sched_priv_data;
v3_paging_mode_t shdw_pg_mode;
+ // arch-independent state of shadow pager
struct v3_shdw_pg_state shdw_pg_state;
- //struct v3_nested_pg_state nested_pg_state;
+ // arch-indepedent state of the passthrough pager
addr_t direct_map_pt;
+ // arch-independent state of the nested pager (currently none)
+ // struct v3_nested_pg_state nested_pg_state;
union {
struct v3_fp_state fp_state;
+ // the arch-dependent state (SVM or VMX)
void * vmm_data;
uint64_t yield_start_cycle;
struct v3_mem_hooks mem_hooks;
+ // arch-indepentent state of shadow pager
struct v3_shdw_impl_state shdw_impl;
- //struct v3_nested_impl_state nested_impl;
+ // arch-independent state of passthrough pager (currently none)
+ struct v3_passthrough_impl_state passthrough_impl;
+ // arch-independent state of the nested pager
+ struct v3_nested_impl_state nested_impl;
void * sched_priv_data;
struct v3_io_map io_map;
#include <palacios/vmm_mem.h>
#include <palacios/vmm_paging.h>
+#include <palacios/vmm_list.h>
+
+
+/**********************************
+ PASSTHROUGH PAGING - CORE FUNC
+ **********************************/
+
+
+struct v3_passthrough_impl_state {
+ // currently there is only a single implementation
+ // that internally includes SVM and VMX support
+ // The externally visible state is just the callbacks
+ struct list_head event_callback_list;
+};
+
+
+int v3_init_passthrough_paging(struct v3_vm_info *vm);
+int v3_init_passthrough_paging_core(struct guest_info *core);
+int v3_deinit_passthrough_paging(struct v3_vm_info *vm);
+int v3_deinit_passthrough_paging_core(struct guest_info *core);
int v3_init_passthrough_pts(struct guest_info * guest_info);
int v3_free_passthrough_pts(struct guest_info * core);
int v3_reset_passthrough_pts(struct guest_info * guest_info);
-int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code);
-int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code);
+// actual_start/end may be null if you don't want this info
+// If non-null, these return the actual affected GPA range
+int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code,
+ addr_t *actual_start, addr_t *actual_end);
int v3_activate_passthrough_pt(struct guest_info * info);
-int v3_invalidate_passthrough_addr(struct guest_info * info, addr_t inv_addr);
+int v3_invalidate_passthrough_addr(struct guest_info * info, addr_t inv_addr,
+ addr_t *actual_start, addr_t *actual_end);
+
// The range invalidated is minimally [start, end]
int v3_invalidate_passthrough_addr_range(struct guest_info * info,
- addr_t inv_addr_start, addr_t inv_addr_end);
+ addr_t inv_addr_start, addr_t inv_addr_end,
+ addr_t *actual_start, addr_t *actual_end);
+
+/**********************************
+ PASSTHROUGH PAGING - EVENTS
+ **********************************/
+
+struct v3_passthrough_pg_event {
+ enum {PASSTHROUGH_PAGEFAULT,PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_ACTIVATE} event_type;
+ enum {PASSTHROUGH_PREIMPL, PASSTHROUGH_POSTIMPL} event_order;
+ addr_t gpa; // for pf
+ pf_error_t error_code; // for pf
+ addr_t gpa_start; // for invalidation of range or page fault
+ addr_t gpa_end; // for invalidation of range or page fault (range is [start,end] )
+ // PREIMPL: start/end is the requested range
+ // POSTIMPL: start/end is the actual range invalidated
+};
+
+
+
+int v3_register_passthrough_paging_event_callback(struct v3_vm_info *vm,
+ int (*callback)(struct guest_info *core,
+ struct v3_passthrough_pg_event *,
+ void *priv_data),
+ void *priv_data);
+
+int v3_unregister_passthrough_paging_event_callback(struct v3_vm_info *vm,
+ int (*callback)(struct guest_info *core,
+ struct v3_passthrough_pg_event,
+ void *priv_data),
+ void *priv_data);
+
+
+
+/*****************************
+ NESTED PAGING - CORE FUNC
+ *****************************/
+
+
+struct v3_nested_impl_state {
+ // currently there is only a single implementation
+ // that internally includes SVM and VMX support
+ // The externally visible state is just the callbacks
+ struct list_head event_callback_list;
+};
+
+int v3_init_nested_paging(struct v3_vm_info *vm);
+int v3_init_nested_paging_core(struct guest_info *core, void *hwinfo);
+int v3_deinit_nested_paging(struct v3_vm_info *vm);
+int v3_deinit_nested_paging_core(struct guest_info *core);
+
+
+// actual_start/end may be null if you don't want this info
+// If non-null, these return the actual affected GPA range
+int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, void *pfinfo,
+ addr_t *actual_start, addr_t *actual_end);
+
+int v3_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr,
+ addr_t *actual_start, addr_t *actual_end);
-int v3_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr);
// The range invalidated is minimally [start, end]
int v3_invalidate_nested_addr_range(struct guest_info * info,
- addr_t inv_addr_start, addr_t inv_addr_end);
+ addr_t inv_addr_start, addr_t inv_addr_end,
+ addr_t *actual_start, addr_t *actual_end);
+
+
+
+/*****************************
+ NESTED PAGING - EVENTS
+ *****************************/
+
+struct v3_nested_pg_event {
+ enum {NESTED_PAGEFAULT,NESTED_INVALIDATE_RANGE} event_type;
+ enum {NESTED_PREIMPL, NESTED_POSTIMPL} event_order;
+ addr_t gpa; // for pf
+ pf_error_t error_code; // for pf
+ addr_t gpa_start; // for invalidation of range or page fault
+ addr_t gpa_end; // for invalidation of range or page fault (range is [start,end] )
+ // PREIMPL: start/end is the requested range
+ // POSTIMPL: start/end is the actual range invalidated
+};
+
+
+
+int v3_register_nested_paging_event_callback(struct v3_vm_info *vm,
+ int (*callback)(struct guest_info *core,
+ struct v3_nested_pg_event *,
+ void *priv_data),
+ void *priv_data);
+
+int v3_unregister_nested_paging_event_callback(struct v3_vm_info *vm,
+ int (*callback)(struct guest_info *core,
+ struct v3_nested_pg_event,
+ void *priv_data),
+ void *priv_data);
+
#endif // ! __V3VEE__
void (*telemetry_fn)(struct v3_vm_info * vm, void * private_data, char * hdr),
void * private_data);
+
#endif
#endif
#include <palacios/vmx_hw_info.h>
+
/* The actual format of these data structures is specified as being machine
dependent. Thus the lengths of the base address fields are defined as variable.
To be safe we assume the maximum(?) size fields
-
typedef struct vmx_eptp {
uint64_t psmt : 3; /* (0=UC, 6=WB) */
uint64_t pwl1 : 3; /* 1 less than EPT page-walk length (?)*/
uint64_t ignore2 : 12;
} __attribute__((packed)) ept_pte_t;
-int v3_init_ept(struct guest_info * core, struct vmx_hw_info * hw_info);
-int v3_handle_ept_fault(struct guest_info * core, addr_t fault_addr, struct ept_exit_qual * ept_qual);
#endif
vmcs.o \
vmx_ctrl_regs.o \
vmx_assist.o \
- vmx_ept.o \
vmx_exits.o
pf_error_t * error_code = (pf_error_t *)&(exit_info1);
if (info->shdw_pg_mode == NESTED_PAGING) {
- if (v3_handle_nested_pagefault(info, fault_addr, *error_code) == -1) {
+ if (v3_handle_nested_pagefault(info, fault_addr, error_code, NULL, NULL) == -1) {
return -1;
}
} else {
--- /dev/null
+#ifndef V3_CONFIG_SVM
+
+
+static int handle_svm_nested_pagefault(struct guest_info * info, addr_t fault_addr, void *pfinfo,
+ addr_t *actual_start, addr_t *actual_end)
+{
+ PrintError(info->vm_info, info, "Cannot do nested page fault as SVM is not enabled.\n");
+ return -1;
+}
+static int handle_svm_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr,
+ addr_t *actual_start, addr_t *actual_end)
+{
+ PrintError(info->vm_info, info, "Cannot do invalidate nested addr as SVM is not enabled.\n");
+ return -1;
+}
+static int handle_svm_invalidate_nested_addr_range(struct guest_info * info,
+ addr_t inv_addr_start, addr_t inv_addr_end,
+ addr_t *actual_start, addr_t *actual_end)
+{
+ PrintError(info->vm_info, info, "Cannot do invalidate nested addr range as SVM is not enabled.\n");
+ return -1;
+}
+
+#else
+
+static int handle_svm_nested_pagefault(struct guest_info * info, addr_t fault_addr, void *pfinfo,
+ addr_t *actual_start, addr_t *actual_end)
+{
+ pf_error_t error_code = *((pf_error_t *) pfinfo);
+ v3_cpu_mode_t mode = v3_get_host_cpu_mode();
+
+
+ PrintDebug(info->vm_info, info, "Nested PageFault: fault_addr=%p, error_code=%u\n", (void *)fault_addr, *(uint_t *)&error_code);
+
+ switch(mode) {
+ case REAL:
+ case PROTECTED:
+ return handle_passthrough_pagefault_32(info, fault_addr, error_code, actual_start, actual_end);
+
+ case PROTECTED_PAE:
+ return handle_passthrough_pagefault_32pae(info, fault_addr, error_code, actual_start, actual_end);
+
+ case LONG:
+ case LONG_32_COMPAT:
+ return handle_passthrough_pagefault_64(info, fault_addr, error_code, actual_start, actual_end);
+
+ default:
+ PrintError(info->vm_info, info, "Unknown CPU Mode\n");
+ break;
+ }
+ return -1;
+}
+
+
+static int handle_svm_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr,
+ addr_t *actual_start, addr_t *actual_end) {
+
+#ifdef __V3_64BIT__
+ v3_cpu_mode_t mode = LONG;
+#else
+#error Compilation for 32 bit target detected
+ v3_cpu_mode_t mode = PROTECTED;
+#endif
+
+ switch(mode) {
+ case REAL:
+ case PROTECTED:
+ return invalidate_addr_32(info, inv_addr, actual_start, actual_end);
+
+ case PROTECTED_PAE:
+ return invalidate_addr_32pae(info, inv_addr, actual_start, actual_end);
+
+ case LONG:
+ case LONG_32_COMPAT:
+ return invalidate_addr_64(info, inv_addr, actual_start, actual_end);
+
+ default:
+ PrintError(info->vm_info, info, "Unknown CPU Mode\n");
+ break;
+ }
+
+ return -1;
+}
+
+static int handle_svm_invalidate_nested_addr_range(struct guest_info * info,
+ addr_t inv_addr_start, addr_t inv_addr_end,
+ addr_t *actual_start, addr_t *actual_end)
+{
+
+#ifdef __V3_64BIT__
+ v3_cpu_mode_t mode = LONG;
+#else
+#error Compilation for 32 bit target detected
+ v3_cpu_mode_t mode = PROTECTED;
+#endif
+
+ switch(mode) {
+ case REAL:
+ case PROTECTED:
+ return invalidate_addr_32_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
+
+ case PROTECTED_PAE:
+ return invalidate_addr_32pae_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
+
+ case LONG:
+ case LONG_32_COMPAT:
+ return invalidate_addr_64_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
+
+ default:
+ PrintError(info->vm_info, info, "Unknown CPU Mode\n");
+ break;
+ }
+
+ return -1;
+}
+
+#endif
return -1;
}
+ if (v3_init_passthrough_paging(vm) == -1) {
+ PrintError(vm, VCORE_NONE, "VM initialization error in passthrough paging\n");
+ return -1;
+ }
+
+ if (v3_init_nested_paging(vm) == -1) {
+ PrintError(vm, VCORE_NONE, "VM initialization error in nested paging\n");
+ return -1;
+ }
v3_init_time_vm(vm);
v3_deinit_mem_hooks(vm);
v3_delete_mem_map(vm);
v3_deinit_shdw_impl(vm);
+ v3_deinit_passthrough_paging(vm);
+ v3_deinit_nested_paging(vm);
v3_deinit_ext_manager(vm);
v3_deinit_intr_routers(vm);
#endif
if (core->shdw_pg_mode == SHADOW_PAGING) {
+ v3_init_passthrough_paging_core(core);
v3_init_shdw_pg_state(core);
+ } else {
+ //done later due to SVM/VMX differences
+ //v3_init_nested_paging_core(core);
}
v3_init_time_core(core);
if (core->shdw_pg_mode == SHADOW_PAGING) {
v3_deinit_shdw_pg_state(core);
+ v3_deinit_passthrough_paging_core(core);
+ } else {
+ v3_deinit_nested_paging_core(core);
}
v3_free_passthrough_pts(core);
v3_invalidate_shadow_pts(&(vm->cores[i]));
} else if (vm->cores[i].shdw_pg_mode==NESTED_PAGING) {
// nested invalidator uses inclusive addressing [start,end], not [start,end)
- v3_invalidate_nested_addr_range(&(vm->cores[i]),reg->guest_start,reg->guest_end-1);
+ v3_invalidate_nested_addr_range(&(vm->cores[i]),reg->guest_start,reg->guest_end-1,NULL,NULL);
} else {
PrintError(vm,VCORE_NONE, "Cannot determine how to invalidate paging structures! Reverting to previous region.\n");
// We'll restore things...
* All rights reserved.
*
* Author: Steven Jaconette <stevenjaconette2007@u.northwestern.edu>
+ * Peter Dinda <pdinda@northwestern.edu> (refactor + events)
*
* This is free software. You are permitted to use,
* redistribute, and modify it as specified in the file "V3VEE_LICENSE".
#include <palacios/vmm_ctrl_regs.h>
-#ifndef V3_CONFIG_DEBUG_NESTED_PAGING
+#if !defined(V3_CONFIG_DEBUG_NESTED_PAGING) && !defined(V3_CONFIG_DEBUG_SHADOW_PAGING)
#undef PrintDebug
#define PrintDebug(fmt, args...)
#endif
+
+/*
+
+ "Direct Paging" combines these three functionalities:
+
+ 1. Passthrough paging for SVM and VMX
+
+ Passthrough paging is used for shadow paging when
+ the guest does not have paging turn on, for example
+ when it is running in real mode or protected mode
+ early in a typical boot process. Passthrough page
+ tables are shadow page tables that are built assuming
+ the guest virtual to guest physical mapping is the identity.
+ Thus, what they implement are the GPA->HPA mapping.
+
+ Passthrough page tables are built using 32PAE paging.
+
+
+ 2. Nested paging on SVM
+
+ The SVM nested page tables have the same format as
+ regular page tables. For this reason, we can reuse
+ much of the passthrough implementation. A nested page
+ table mapping is a GPA->HPA mapping, creating a very
+ simlar model as with passthrough paging, just that it's
+ always active, whether the guest has paging on or not.
+
+
+ 3. Nested paging on VMX
+
+ The VMX nested page tables have a different format
+ than regular page tables. For this reason, we have
+ implemented them in the vmx_npt.h file. The code
+ here then is a wrapper, allowing us to make nested
+ paging functionality appear uniform across VMX and SVM
+ elsewhere in the codebase.
+
+*/
+
+
+
+static inline int is_vmx_nested()
+{
+ extern v3_cpu_arch_t v3_mach_type;
+
+ return (v3_mach_type==V3_VMX_EPT_CPU || v3_mach_type==V3_VMX_EPT_UG_CPU);
+}
+
+static inline int is_svm_nested()
+{
+ extern v3_cpu_arch_t v3_mach_type;
+
+ return (v3_mach_type==V3_SVM_REV3_CPU);
+}
+
+
+struct passthrough_event_callback {
+ int (*callback)(struct guest_info *core, struct v3_passthrough_pg_event *event, void *priv_data);
+ void *priv_data;
+
+ struct list_head node;
+};
+
+
+static int have_passthrough_callbacks(struct guest_info *core)
+{
+ return !list_empty(&(core->vm_info->passthrough_impl.event_callback_list));
+}
+
+static void dispatch_passthrough_event(struct guest_info *core, struct v3_passthrough_pg_event *event)
+{
+ struct passthrough_event_callback *cb,*temp;
+
+ list_for_each_entry_safe(cb,
+ temp,
+ &(core->vm_info->passthrough_impl.event_callback_list),
+ node) {
+ cb->callback(core,event,cb->priv_data);
+ }
+}
+
+struct nested_event_callback {
+ int (*callback)(struct guest_info *core, struct v3_nested_pg_event *event, void *priv_data);
+ void *priv_data;
+
+ struct list_head node;
+};
+
+
+static int have_nested_callbacks(struct guest_info *core)
+{
+ return !list_empty(&(core->vm_info->nested_impl.event_callback_list));
+}
+
+static void dispatch_nested_event(struct guest_info *core, struct v3_nested_pg_event *event)
+{
+ struct nested_event_callback *cb,*temp;
+
+ list_for_each_entry_safe(cb,
+ temp,
+ &(core->vm_info->nested_impl.event_callback_list),
+ node) {
+ cb->callback(core,event,cb->priv_data);
+ }
+}
+
+
+
+
static addr_t create_generic_pt_page(struct guest_info *core) {
void * page = 0;
void *temp;
#include "vmm_direct_paging_32pae.h"
#include "vmm_direct_paging_64.h"
+
+
int v3_init_passthrough_pts(struct guest_info * info) {
info->direct_map_pt = (addr_t)V3_PAddr((void *)create_generic_pt_page(info));
return 0;
// For now... But we need to change this....
// As soon as shadow paging becomes active the passthrough tables are hosed
// So this will cause chaos if it is called at that time
+
+ if (have_passthrough_callbacks(info)) {
+ struct v3_passthrough_pg_event event={PASSTHROUGH_ACTIVATE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},0,0};
+ dispatch_passthrough_event(info,&event);
+ }
+
struct cr3_32_PAE * shadow_cr3 = (struct cr3_32_PAE *) &(info->ctrl_regs.cr3);
struct cr4_32 * shadow_cr4 = (struct cr4_32 *) &(info->ctrl_regs.cr4);
addr_t shadow_pt_addr = *(addr_t*)&(info->direct_map_pt);
shadow_cr3->pdpt_base_addr = shadow_pt_addr >> 5;
shadow_cr4->pae = 1;
PrintDebug(info->vm_info, info, "Activated Passthrough Page tables\n");
+
+ if (have_passthrough_callbacks(info)) {
+ struct v3_passthrough_pg_event event={PASSTHROUGH_ACTIVATE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},0,0};
+ dispatch_passthrough_event(info,&event);
+ }
+
return 0;
}
-int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) {
+
+int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code,
+ addr_t *actual_start, addr_t *actual_end) {
v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
+ addr_t start, end;
+ int rc;
+
+ if (have_passthrough_callbacks(info)) {
+ struct v3_passthrough_pg_event event={PASSTHROUGH_PAGEFAULT,PASSTHROUGH_PREIMPL,fault_addr,error_code,fault_addr,fault_addr};
+ dispatch_passthrough_event(info,&event);
+ }
+
+ if (!actual_start) { actual_start=&start; }
+ if (!actual_end) { actual_end=&end; }
+
+
+ rc=-1;
switch(mode) {
case REAL:
case LONG:
case LONG_32_COMPAT:
// Long mode will only use 32PAE page tables...
- return handle_passthrough_pagefault_32pae(info, fault_addr, error_code);
+ rc=handle_passthrough_pagefault_32pae(info, fault_addr, error_code, actual_start, actual_end);
default:
PrintError(info->vm_info, info, "Unknown CPU Mode\n");
break;
}
- return -1;
+
+ if (have_passthrough_callbacks(info)) {
+ struct v3_passthrough_pg_event event={PASSTHROUGH_PAGEFAULT,PASSTHROUGH_POSTIMPL,fault_addr,error_code,*actual_start,*actual_end};
+ dispatch_passthrough_event(info,&event);
+ }
+
+ return rc;
}
-int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) {
- v3_cpu_mode_t mode = v3_get_host_cpu_mode();
+int v3_invalidate_passthrough_addr(struct guest_info * info, addr_t inv_addr,
+ addr_t *actual_start, addr_t *actual_end) {
+ v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
+ addr_t start, end;
+ int rc;
- PrintDebug(info->vm_info, info, "Nested PageFault: fault_addr=%p, error_code=%u\n", (void *)fault_addr, *(uint_t *)&error_code);
+ if (have_passthrough_callbacks(info)) {
+ struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr),PAGE_ADDR(inv_addr)+PAGE_SIZE-1};
+ dispatch_passthrough_event(info,&event);
+ }
- switch(mode) {
- case REAL:
- case PROTECTED:
- return handle_passthrough_pagefault_32(info, fault_addr, error_code);
+ if (!actual_start) { actual_start=&start;}
+ if (!actual_end) { actual_end=&end;}
- case PROTECTED_PAE:
- return handle_passthrough_pagefault_32pae(info, fault_addr, error_code);
- case LONG:
- case LONG_32_COMPAT:
- return handle_passthrough_pagefault_64(info, fault_addr, error_code);
-
- default:
- PrintError(info->vm_info, info, "Unknown CPU Mode\n");
- break;
- }
- return -1;
-}
-int v3_invalidate_passthrough_addr(struct guest_info * info, addr_t inv_addr) {
- v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
+ rc=-1;
switch(mode) {
case REAL:
case LONG:
case LONG_32_COMPAT:
// Long mode will only use 32PAE page tables...
- return invalidate_addr_32pae(info, inv_addr);
+ rc=invalidate_addr_32pae(info, inv_addr, actual_start, actual_end);
default:
PrintError(info->vm_info, info, "Unknown CPU Mode\n");
break;
}
- return -1;
+
+ if (have_passthrough_callbacks(info)) {
+ struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},*actual_start,*actual_end};
+ dispatch_passthrough_event(info,&event);
+ }
+
+
+ return rc;
}
int v3_invalidate_passthrough_addr_range(struct guest_info * info,
- addr_t inv_addr_start, addr_t inv_addr_end) {
+ addr_t inv_addr_start, addr_t inv_addr_end,
+ addr_t *actual_start, addr_t *actual_end) {
v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
+ addr_t start, end;
+ int rc;
+
+ if (!actual_start) { actual_start=&start;}
+ if (!actual_end) { actual_end=&end;}
+
+ if (have_passthrough_callbacks(info)) {
+ struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr_start),PAGE_ADDR(inv_addr_end-1)+PAGE_SIZE-1};
+ dispatch_passthrough_event(info,&event);
+ }
+
+ rc=-1;
switch(mode) {
case REAL:
case LONG:
case LONG_32_COMPAT:
// Long mode will only use 32PAE page tables...
- return invalidate_addr_32pae_range(info, inv_addr_start, inv_addr_end);
+ rc=invalidate_addr_32pae_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
default:
PrintError(info->vm_info, info, "Unknown CPU Mode\n");
break;
}
- return -1;
+
+ if (have_passthrough_callbacks(info)) {
+ struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},*actual_start,*actual_end};
+ dispatch_passthrough_event(info,&event);
+ }
+
+ return rc;
}
-int v3_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr) {
-#ifdef __V3_64BIT__
- v3_cpu_mode_t mode = LONG;
-#else
- v3_cpu_mode_t mode = PROTECTED;
-#endif
+int v3_init_passthrough_paging(struct v3_vm_info *vm)
+{
+ INIT_LIST_HEAD(&(vm->passthrough_impl.event_callback_list));
+ return 0;
+}
- switch(mode) {
- case REAL:
- case PROTECTED:
- return invalidate_addr_32(info, inv_addr);
+int v3_deinit_passthrough_paging(struct v3_vm_info *vm)
+{
+ struct passthrough_event_callback *cb,*temp;
+
+ list_for_each_entry_safe(cb,
+ temp,
+ &(vm->passthrough_impl.event_callback_list),
+ node) {
+ list_del(&(cb->node));
+ V3_Free(cb);
+ }
+
+ return 0;
+}
- case PROTECTED_PAE:
- return invalidate_addr_32pae(info, inv_addr);
+int v3_init_passthrough_paging_core(struct guest_info *core)
+{
+ // currently nothing to init
+ return 0;
+}
+
+int v3_deinit_passthrough_paging_core(struct guest_info *core)
+{
+ // currently nothing to deinit
+ return 0;
+}
+
+
+// inline nested paging support for Intel and AMD
+#include "svm_npt.h"
+#include "vmx_npt.h"
- case LONG:
- case LONG_32_COMPAT:
- return invalidate_addr_64(info, inv_addr);
-
- default:
- PrintError(info->vm_info, info, "Unknown CPU Mode\n");
- break;
- }
- return -1;
+inline void convert_to_pf_error(void *pfinfo, pf_error_t *out)
+{
+ if (is_vmx_nested()) {
+#ifdef V3_CONFIG_VMX
+ ept_exit_qual_to_pf_error((struct ept_exit_qual *)pfinfo, out);
+#endif
+ } else {
+ *out = *(pf_error_t *)pfinfo;
+ }
+}
+
+int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, void *pfinfo, addr_t *actual_start, addr_t *actual_end)
+{
+ int rc;
+ pf_error_t err;
+ addr_t start, end;
+
+ if (!actual_start) { actual_start=&start; }
+ if (!actual_end) { actual_end=&end; }
+
+ convert_to_pf_error(pfinfo,&err);
+
+ if (have_nested_callbacks(info)) {
+ struct v3_nested_pg_event event={NESTED_PAGEFAULT,NESTED_PREIMPL,fault_addr,err,fault_addr,fault_addr};
+ dispatch_nested_event(info,&event);
+ }
+
+
+ if (is_vmx_nested()) {
+ rc = handle_vmx_nested_pagefault(info,fault_addr,pfinfo,actual_start,actual_end);
+ } else {
+ rc = handle_svm_nested_pagefault(info,fault_addr,pfinfo,actual_start,actual_end);
+ }
+
+ if (have_nested_callbacks(info)) {
+ struct v3_nested_pg_event event={NESTED_PAGEFAULT,NESTED_POSTIMPL,fault_addr,err,*actual_start,*actual_end};
+ dispatch_nested_event(info,&event);
+ }
+
+ return rc;
+}
+
+
+
+int v3_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr,
+ addr_t *actual_start, addr_t *actual_end)
+{
+ int rc;
+
+ addr_t start, end;
+
+ if (!actual_start) { actual_start=&start; }
+ if (!actual_end) { actual_end=&end; }
+
+
+ if (have_nested_callbacks(info)) {
+ struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr),PAGE_ADDR(inv_addr)+PAGE_SIZE-1};
+ dispatch_nested_event(info,&event);
+ }
+
+ if (is_vmx_nested()) {
+ rc = handle_vmx_invalidate_nested_addr(info, inv_addr, actual_start, actual_end);
+ } else {
+ rc = handle_svm_invalidate_nested_addr(info, inv_addr, actual_start, actual_end);
+ }
+
+ if (have_nested_callbacks(info)) {
+ struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_POSTIMPL,0,{0,0,0,0,0,0},*actual_start, *actual_end};
+ dispatch_nested_event(info,&event);
+ }
+ return rc;
}
+
int v3_invalidate_nested_addr_range(struct guest_info * info,
- addr_t inv_addr_start, addr_t inv_addr_end) {
+ addr_t inv_addr_start, addr_t inv_addr_end,
+ addr_t *actual_start, addr_t *actual_end)
+{
+ int rc;
+
+ addr_t start, end;
+
+ if (!actual_start) { actual_start=&start; }
+ if (!actual_end) { actual_end=&end; }
+
+ if (have_nested_callbacks(info)) {
+ struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr_start),PAGE_ADDR(inv_addr_end-1)+PAGE_SIZE-1};
+ dispatch_nested_event(info,&event);
+ }
+
+ if (is_vmx_nested()) {
+ rc = handle_vmx_invalidate_nested_addr_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
+ } else {
+ rc = handle_svm_invalidate_nested_addr_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
+ }
+
+
+ if (have_nested_callbacks(info)) {
+ struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},*actual_start, *actual_end};
+ dispatch_nested_event(info,&event);
+ }
+
+ return rc;
+
+}
-#ifdef __V3_64BIT__
- v3_cpu_mode_t mode = LONG;
-#else
- v3_cpu_mode_t mode = PROTECTED;
-#endif
- switch(mode) {
- case REAL:
- case PROTECTED:
- return invalidate_addr_32_range(info, inv_addr_start, inv_addr_end);
+int v3_init_nested_paging(struct v3_vm_info *vm)
+{
+ INIT_LIST_HEAD(&(vm->nested_impl.event_callback_list));
+ return 0;
+}
- case PROTECTED_PAE:
- return invalidate_addr_32pae_range(info, inv_addr_start, inv_addr_end);
+int v3_init_nested_paging_core(struct guest_info *core, void *hwinfo)
+{
+ if (is_vmx_nested()) {
+ return init_ept(core, (struct vmx_hw_info *) hwinfo);
+ } else {
+ // no initialization for SVM
+ return 0;
+ }
+}
+
+int v3_deinit_nested_paging(struct v3_vm_info *vm)
+{
+ struct nested_event_callback *cb,*temp;
+
+ list_for_each_entry_safe(cb,
+ temp,
+ &(vm->nested_impl.event_callback_list),
+ node) {
+ list_del(&(cb->node));
+ V3_Free(cb);
+ }
+
+ return 0;
+}
- case LONG:
- case LONG_32_COMPAT:
- return invalidate_addr_64_range(info, inv_addr_start, inv_addr_end);
-
- default:
- PrintError(info->vm_info, info, "Unknown CPU Mode\n");
- break;
- }
+int v3_deinit_nested_paging_core(struct guest_info *core)
+{
+ // nothing to do.. probably dealloc? FIXME PAD
- return -1;
+ return 0;
}
#include <palacios/vmm_ctrl_regs.h>
+/* This always build 2 level page tables - no large pages are used */
+
static inline int handle_passthrough_pagefault_32(struct guest_info * info,
addr_t fault_addr,
- pf_error_t error_code) {
+ pf_error_t error_code,
+ addr_t *actual_start, addr_t *actual_end) {
+
// Check to see if pde and pte exist (create them if not)
pde32_t * pde = NULL;
pte32_t * pte = NULL;
}
+ *actual_start = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr));
+ *actual_end = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr)+1)-1;
+
// Fix up the PDE entry
if (pde[pde_index].present == 0) {
pte = (pte32_t *)create_generic_pt_page(info);
}
-static inline int invalidate_addr_32(struct guest_info * core, addr_t inv_addr)
+static inline int invalidate_addr_32(struct guest_info * core, addr_t inv_addr,
+ addr_t *actual_start, addr_t *actual_end)
{
- addr_t start;
uint64_t len;
-
- return invalidate_addr_32_internal(core,inv_addr,&start,&len);
+ int rc;
+
+ rc = invalidate_addr_32_internal(core,inv_addr,actual_start,&len);
+
+ *actual_end = *actual_start + len - 1;
+
+ return rc;
}
-static inline int invalidate_addr_32_range(struct guest_info * core, addr_t inv_addr_start, addr_t inv_addr_end)
+static inline int invalidate_addr_32_range(struct guest_info * core, addr_t inv_addr_start, addr_t inv_addr_end,
+ addr_t *actual_start, addr_t *actual_end)
{
addr_t next;
addr_t start;
for (next=inv_addr_start; next<=inv_addr_end; ) {
rc = invalidate_addr_32_internal(core,next,&start, &len);
- if (rc) {
+ if (next==inv_addr_start) {
+ // first iteration, capture where we start invalidating
+ *actual_start = start;
+ }
+ if (rc) {
return rc;
}
next = start + len;
+ *actual_end = next;
}
+ // last iteration, actual_end is off by one
+ (*actual_end)--;
return 0;
}
#include <palacios/vm_guest_mem.h>
#include <palacios/vm_guest.h>
+/* This always builds 3 level page tables - no large pages */
static inline int handle_passthrough_pagefault_32pae(struct guest_info * info,
addr_t fault_addr,
- pf_error_t error_code) {
+ pf_error_t error_code,
+ addr_t *actual_start, addr_t *actual_end) {
pdpe32pae_t * pdpe = NULL;
pde32pae_t * pde = NULL;
pte32pae_t * pte = NULL;
}
PrintDebug(info->vm_info, info, "Handling pde error pd base address =%p\n", (void *)pde);
+ *actual_start = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr));
+ *actual_end = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr)+1)-1;
+
// Fix up the PDE entry
if (pde[pde_index].present == 0) {
pte = (pte32pae_t *)create_generic_pt_page(info);
}
PrintDebug(info->vm_info, info, "Handling pte error pt base address=%p\n", (void *)pte);
+
+
// Fix up the PTE entry
if (pte[pte_index].present == 0) {
pte[pte_index].user_page = 1;
-static inline int invalidate_addr_32pae(struct guest_info * core, addr_t inv_addr)
+static inline int invalidate_addr_32pae(struct guest_info * core, addr_t inv_addr,
+ addr_t *actual_start, addr_t *actual_end)
{
- addr_t start;
uint64_t len;
+ int rc;
- return invalidate_addr_32pae_internal(core,inv_addr,&start,&len);
+ rc = invalidate_addr_32pae_internal(core,inv_addr,actual_start,&len);
+
+ *actual_end = *actual_start + len - 1;
+
+ return rc;
+
+
}
-static inline int invalidate_addr_32pae_range(struct guest_info * core, addr_t inv_addr_start, addr_t inv_addr_end)
+static inline int invalidate_addr_32pae_range(struct guest_info * core, addr_t inv_addr_start, addr_t inv_addr_end,
+ addr_t *actual_start, addr_t *actual_end)
{
addr_t next;
addr_t start;
for (next=inv_addr_start; next<=inv_addr_end; ) {
rc = invalidate_addr_32pae_internal(core,next,&start, &len);
+ if (next==inv_addr_start) {
+ // first iteration, capture where we start invalidating
+ *actual_start = start;
+ }
if (rc) {
return rc;
}
next = start + len;
+ *actual_end = next;
}
+ // last iteration, actual_end is off by one
+ (*actual_end)--;
return 0;
}
#include <palacios/vm_guest_mem.h>
#include <palacios/vm_guest.h>
+/* this always builds 4 level page tables, but large pages are allowed */
+
// Reference: AMD Software Developer Manual Vol.2 Ch.5 "Page Translation and Protection"
-static inline int handle_passthrough_pagefault_64(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
+static inline int handle_passthrough_pagefault_64(struct guest_info * core, addr_t fault_addr, pf_error_t error_code,
+ addr_t *actual_start, addr_t *actual_end) {
pml4e64_t * pml = NULL;
pdpe64_t * pdpe = NULL;
pde64_t * pde = NULL;
pde2mb = (pde64_2MB_t *)pde; // all but these two lines are the same for PTE
pde2mb[pde_index].large_page = 1;
+ *actual_start = BASE_TO_PAGE_ADDR_2MB(PAGE_BASE_ADDR_2MB(fault_addr));
+ *actual_end = BASE_TO_PAGE_ADDR_2MB(PAGE_BASE_ADDR_2MB(fault_addr)+1)-1;
+
if (pde2mb[pde_index].present == 0) {
pde2mb[pde_index].user_page = 1;
// Continue with the 4KiB page heirarchy
+ *actual_start = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr));
+ *actual_end = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr)+1)-1;
+
// Fix up the PDE entry
if (pde[pde_index].present == 0) {
pte = (pte64_t *)create_generic_pt_page(core);
return 0;
}
-static inline int invalidate_addr_64(struct guest_info * core, addr_t inv_addr)
+static inline int invalidate_addr_64(struct guest_info * core, addr_t inv_addr,
+ addr_t *actual_start, addr_t *actual_end)
{
- addr_t start;
uint64_t len;
+ int rc;
- return invalidate_addr_64_internal(core,inv_addr,&start,&len);
+ rc = invalidate_addr_64_internal(core,inv_addr,actual_start,&len);
+
+ *actual_end = *actual_start + len - 1;
+
+ return rc;
}
-static inline int invalidate_addr_64_range(struct guest_info * core, addr_t inv_addr_start, addr_t inv_addr_end)
+static inline int invalidate_addr_64_range(struct guest_info * core, addr_t inv_addr_start, addr_t inv_addr_end,
+ addr_t *actual_start, addr_t *actual_end)
{
addr_t next;
addr_t start;
for (next=inv_addr_start; next<=inv_addr_end; ) {
rc = invalidate_addr_64_internal(core,next,&start, &len);
+ if (next==inv_addr_start) {
+ // first iteration, capture where we start invalidating
+ *actual_start = start;
+ }
if (rc) {
return rc;
}
next = start + len;
+ *actual_end = next;
}
+ // last iteration, actual_end is off by one
+ (*actual_end)--;
return 0;
}
v3_mem_mode_t mem_mode = v3_get_vm_mem_mode(info);
if (mem_mode == PHYSICAL_MEM) {
- rc |= v3_invalidate_passthrough_addr_range(info, region->guest_start, region->guest_end-1);
+ rc |= v3_invalidate_passthrough_addr_range(info, region->guest_start, region->guest_end-1,NULL,NULL);
} else {
rc |= v3_invalidate_shadow_pts(info);
}
} else if (info->shdw_pg_mode == NESTED_PAGING) {
- rc |= v3_invalidate_nested_addr_range(info, region->guest_start, region->guest_end-1);
+ rc |= v3_invalidate_nested_addr_range(info, region->guest_start, region->guest_end-1,NULL,NULL);
}
}
v3_mem_mode_t mem_mode = v3_get_vm_mem_mode(info);
if (mem_mode == PHYSICAL_MEM) {
- rc |= v3_invalidate_passthrough_addr_range(info,reg->guest_start, reg->guest_end-1);
+ rc |= v3_invalidate_passthrough_addr_range(info,reg->guest_start, reg->guest_end-1,NULL,NULL);
} else {
rc |= v3_invalidate_shadow_pts(info);
}
} else if (info->shdw_pg_mode == NESTED_PAGING) {
- rc |= v3_invalidate_nested_addr_range(info,reg->guest_start, reg->guest_end-1);
+ rc |= v3_invalidate_nested_addr_range(info,reg->guest_start, reg->guest_end-1,NULL,NULL);
}
}
if (v3_get_vm_mem_mode(core) == PHYSICAL_MEM) {
// If paging is not turned on we need to handle the special cases
- rc = v3_handle_passthrough_pagefault(core, fault_addr, error_code);
+ rc = v3_handle_passthrough_pagefault(core, fault_addr, error_code,NULL,NULL);
} else if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
struct v3_shdw_impl_state * state = &(core->vm_info->shdw_impl);
struct v3_shdw_pg_impl * impl = state->current_impl;
- if (v3_init_ept(core, &hw_info) == -1) {
+ if (v3_init_nested_paging_core(core, &hw_info) == -1) {
PrintError(core->vm_info, core, "Error initializing EPT\n");
return -1;
}
((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->ne = 1;
((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->cd = 0;
- if (v3_init_ept(core, &hw_info) == -1) {
+ if (v3_init_nested_paging_core(core, &hw_info) == -1) {
PrintError(core->vm_info, core, "Error initializing EPT\n");
return -1;
}
+++ /dev/null
-/*
- * This file is part of the Palacios Virtual Machine Monitor developed
- * by the V3VEE Project with funding from the United States National
- * Science Foundation and the Department of Energy.
- *
- * The V3VEE Project is a joint project between Northwestern University
- * and the University of New Mexico. You can find out more at
- * http://www.v3vee.org
- *
- * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu>
- * All rights reserved.
- *
- * Author: Jack Lange <jacklange@cs.pitt.edu>
- *
- * This is free software. You are permitted to use,
- * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
- */
-
-#include <palacios/vmm.h>
-#include <palacios/vmx_ept.h>
-#include <palacios/vmx_lowlevel.h>
-#include <palacios/vmm_paging.h>
-#include <palacios/vm_guest_mem.h>
-
-
-static struct vmx_ept_msr * ept_info = NULL;
-
-
-static addr_t create_ept_page() {
- void * temp;
- void * page = 0;
-
- temp = V3_AllocPages(1); // need not be shadow-safe, not exposed to guest
- if (!temp) {
- PrintError(VM_NONE, VCORE_NONE, "Cannot allocate EPT page\n");
- return 0;
- }
- page = V3_VAddr(temp);
- memset(page, 0, PAGE_SIZE);
-
- return (addr_t)page;
-}
-
-
-
-
-int v3_init_ept(struct guest_info * core, struct vmx_hw_info * hw_info) {
- addr_t ept_pa = (addr_t)V3_PAddr((void *)create_ept_page());
- vmx_eptp_t * ept_ptr = (vmx_eptp_t *)&(core->direct_map_pt);
-
-
- ept_info = &(hw_info->ept_info);
-
- /* TODO: Should we set this to WB?? */
- ept_ptr->psmt = 0;
-
- if (ept_info->pg_walk_len4) {
- ept_ptr->pwl1 = 3;
- } else {
- PrintError(core->vm_info, core, "Unsupported EPT Table depth\n");
- return -1;
- }
-
- ept_ptr->pml_base_addr = PAGE_BASE_ADDR(ept_pa);
-
-
- return 0;
-}
-
-
-/* We can use the default paging macros, since the formats are close enough to allow it */
-
-int v3_handle_ept_fault(struct guest_info * core, addr_t fault_addr, struct ept_exit_qual * ept_qual) {
- ept_pml4_t * pml = NULL;
- // ept_pdp_1GB_t * pdpe1gb = NULL;
- ept_pdp_t * pdpe = NULL;
- ept_pde_2MB_t * pde2mb = NULL;
- ept_pde_t * pde = NULL;
- ept_pte_t * pte = NULL;
- addr_t host_addr = 0;
-
- int pml_index = PML4E64_INDEX(fault_addr);
- int pdpe_index = PDPE64_INDEX(fault_addr);
- int pde_index = PDE64_INDEX(fault_addr);
- int pte_index = PTE64_INDEX(fault_addr);
-
- struct v3_mem_region * region = v3_get_mem_region(core->vm_info, core->vcpu_id, fault_addr);
- int page_size = PAGE_SIZE_4KB;
-
-
-
- pf_error_t error_code = {0};
- error_code.present = ept_qual->present;
- error_code.write = ept_qual->write;
-
- if (region == NULL) {
- PrintError(core->vm_info, core, "invalid region, addr=%p\n", (void *)fault_addr);
- return -1;
- }
-
- if ((core->use_large_pages == 1) || (core->use_giant_pages == 1)) {
- page_size = v3_get_max_page_size(core, fault_addr, LONG);
- }
-
-
-
- pml = (ept_pml4_t *)CR3_TO_PML4E64_VA(core->direct_map_pt);
-
-
-
- //Fix up the PML entry
- if (pml[pml_index].read == 0) {
- pdpe = (ept_pdp_t *)create_ept_page();
-
- // Set default PML Flags...
- pml[pml_index].read = 1;
- pml[pml_index].write = 1;
- pml[pml_index].exec = 1;
-
- pml[pml_index].pdp_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pdpe));
- } else {
- pdpe = V3_VAddr((void *)BASE_TO_PAGE_ADDR_4KB(pml[pml_index].pdp_base_addr));
- }
-
-
- // Fix up the PDPE entry
- if (pdpe[pdpe_index].read == 0) {
- pde = (ept_pde_t *)create_ept_page();
-
- // Set default PDPE Flags...
- pdpe[pdpe_index].read = 1;
- pdpe[pdpe_index].write = 1;
- pdpe[pdpe_index].exec = 1;
-
- pdpe[pdpe_index].pd_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pde));
- } else {
- pde = V3_VAddr((void *)BASE_TO_PAGE_ADDR_4KB(pdpe[pdpe_index].pd_base_addr));
- }
-
-
-
- // Fix up the 2MiB PDE and exit here
- if (page_size == PAGE_SIZE_2MB) {
- pde2mb = (ept_pde_2MB_t *)pde; // all but these two lines are the same for PTE
- pde2mb[pde_index].large_page = 1;
-
- if (pde2mb[pde_index].read == 0) {
-
- if ( (region->flags.alloced == 1) &&
- (region->flags.read == 1)) {
- // Full access
- pde2mb[pde_index].read = 1;
- pde2mb[pde_index].exec = 1;
- pde2mb[pde_index].ipat = 1;
- pde2mb[pde_index].mt = 6;
-
- if (region->flags.write == 1) {
- pde2mb[pde_index].write = 1;
- } else {
- pde2mb[pde_index].write = 0;
- }
-
- if (v3_gpa_to_hpa(core, fault_addr, &host_addr) == -1) {
- PrintError(core->vm_info, core, "Error: Could not translate fault addr (%p)\n", (void *)fault_addr);
- return -1;
- }
-
- pde2mb[pde_index].page_base_addr = PAGE_BASE_ADDR_2MB(host_addr);
- } else {
- return region->unhandled(core, fault_addr, fault_addr, region, error_code);
- }
- } else {
- // We fix all permissions on the first pass,
- // so we only get here if its an unhandled exception
-
- return region->unhandled(core, fault_addr, fault_addr, region, error_code);
- }
-
- return 0;
- }
-
- // Continue with the 4KiB page heirarchy
-
-
- // Fix up the PDE entry
- if (pde[pde_index].read == 0) {
- pte = (ept_pte_t *)create_ept_page();
-
- pde[pde_index].read = 1;
- pde[pde_index].write = 1;
- pde[pde_index].exec = 1;
-
- pde[pde_index].pt_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pte));
- } else {
- pte = V3_VAddr((void *)BASE_TO_PAGE_ADDR_4KB(pde[pde_index].pt_base_addr));
- }
-
-
-
-
- // Fix up the PTE entry
- if (pte[pte_index].read == 0) {
-
- if ( (region->flags.alloced == 1) &&
- (region->flags.read == 1)) {
- // Full access
- pte[pte_index].read = 1;
- pte[pte_index].exec = 1;
- pte[pte_index].ipat = 1;
- pte[pte_index].mt = 6;
-
- if (region->flags.write == 1) {
- pte[pte_index].write = 1;
- } else {
- pte[pte_index].write = 0;
- }
-
- if (v3_gpa_to_hpa(core, fault_addr, &host_addr) == -1) {
- PrintError(core->vm_info, core, "Error Could not translate fault addr (%p)\n", (void *)fault_addr);
- return -1;
- }
-
-
- pte[pte_index].page_base_addr = PAGE_BASE_ADDR_4KB(host_addr);
- } else {
- return region->unhandled(core, fault_addr, fault_addr, region, error_code);
- }
- } else {
- // We fix all permissions on the first pass,
- // so we only get here if its an unhandled exception
-
- return region->unhandled(core, fault_addr, fault_addr, region, error_code);
- }
-
-
- return 0;
-}
case VMX_EXIT_EPT_VIOLATION: {
struct ept_exit_qual * ept_qual = (struct ept_exit_qual *)&(exit_info->exit_qual);
- if (v3_handle_ept_fault(info, exit_info->ept_fault_addr, ept_qual) == -1) {
+ if (v3_handle_nested_pagefault(info, exit_info->ept_fault_addr, ept_qual,NULL,NULL) == -1) {
PrintError(info->vm_info, info, "Error handling EPT fault\n");
return -1;
}
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu>
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jacklange@cs.pitt.edu> (implementation)
+ * Peter Dinda <pdinda@northwestern.edu> (invalidation)
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmx_ept.h>
+#include <palacios/vmx_lowlevel.h>
+#include <palacios/vmm_paging.h>
+#include <palacios/vm_guest_mem.h>
+
+
+/*
+
+ Note that the Intel nested page table have a slightly different format
+ than regular page tables. Also note that our implementation
+ uses only 64 bit (4 level) page tables. This is unlike the SVM
+ nested paging implementation.
+
+
+*/
+
+#ifndef V3_CONFIG_VMX
+
+
+static int handle_vmx_nested_pagefault(struct guest_info * info, addr_t fault_addr, void *info)
+{
+ PrintError(info->vm_info, info, "Cannot do nested page fault as VMX is not enabled.\n");
+ return -1;
+}
+static int handle_vmx_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr)
+{
+ PrintError(info->vm_info, info, "Cannot do invalidate nested addr as VMX is not enabled.\n");
+ return -1;
+}
+static int handle_vmx_invalidate_nested_addr_range(struct guest_info * info,
+ addr_t inv_addr_start, addr_t inv_addr_end)
+{
+ PrintError(info->vm_info, info, "Cannot do invalidate nested addr range as VMX is not enabled.\n");
+ return -1;
+}
+
+#else
+
+static struct vmx_ept_msr * ept_info = NULL;
+
+
+static addr_t create_ept_page() {
+ void * temp;
+ void * page = 0;
+
+ temp = V3_AllocPages(1); // need not be shadow-safe, not exposed to guest
+ if (!temp) {
+ PrintError(VM_NONE, VCORE_NONE, "Cannot allocate EPT page\n");
+ return 0;
+ }
+ page = V3_VAddr(temp);
+ memset(page, 0, PAGE_SIZE);
+
+ return (addr_t)page;
+}
+
+
+
+
+static int init_ept(struct guest_info * core, struct vmx_hw_info * hw_info) {
+ addr_t ept_pa = (addr_t)V3_PAddr((void *)create_ept_page());
+ vmx_eptp_t * ept_ptr = (vmx_eptp_t *)&(core->direct_map_pt);
+
+
+ ept_info = &(hw_info->ept_info);
+
+ /* TODO: Should we set this to WB?? */
+ ept_ptr->psmt = 0;
+
+ if (ept_info->pg_walk_len4) {
+ ept_ptr->pwl1 = 3;
+ } else {
+ PrintError(core->vm_info, core, "Unsupported EPT Table depth\n");
+ return -1;
+ }
+
+ ept_ptr->pml_base_addr = PAGE_BASE_ADDR(ept_pa);
+
+
+ return 0;
+}
+
+
+static inline void ept_exit_qual_to_pf_error(struct ept_exit_qual *qual, pf_error_t *error)
+{
+ memset(error,0,sizeof(pf_error_t));
+ error->present = qual->present;
+ error->write = qual->write;
+ error->ifetch = qual->ifetch;
+}
+
+
+/* We can use the default paging macros, since the formats are close enough to allow it */
+
+
+static int handle_vmx_nested_pagefault(struct guest_info * core, addr_t fault_addr, void *pfinfo,
+ addr_t *actual_start, addr_t *actual_end )
+{
+ struct ept_exit_qual * ept_qual = (struct ept_exit_qual *) pfinfo;
+ ept_pml4_t * pml = NULL;
+ // ept_pdp_1GB_t * pdpe1gb = NULL;
+ ept_pdp_t * pdpe = NULL;
+ ept_pde_2MB_t * pde2mb = NULL;
+ ept_pde_t * pde = NULL;
+ ept_pte_t * pte = NULL;
+ addr_t host_addr = 0;
+
+ int pml_index = PML4E64_INDEX(fault_addr);
+ int pdpe_index = PDPE64_INDEX(fault_addr);
+ int pde_index = PDE64_INDEX(fault_addr);
+ int pte_index = PTE64_INDEX(fault_addr);
+
+ struct v3_mem_region * region = v3_get_mem_region(core->vm_info, core->vcpu_id, fault_addr);
+ int page_size = PAGE_SIZE_4KB;
+
+
+ pf_error_t error_code;
+
+ ept_exit_qual_to_pf_error(ept_qual, &error_code);
+
+ PrintDebug(info->vm_info, info, "Nested PageFault: fault_addr=%p, error_code=%u, exit_qual=0x%llx\n", (void *)fault_addr, *(uint_t *)&error_code, qual->value);
+
+
+ if (region == NULL) {
+ PrintError(core->vm_info, core, "invalid region, addr=%p\n", (void *)fault_addr);
+ return -1;
+ }
+
+ if ((core->use_large_pages == 1) || (core->use_giant_pages == 1)) {
+ page_size = v3_get_max_page_size(core, fault_addr, LONG);
+ }
+
+
+
+ pml = (ept_pml4_t *)CR3_TO_PML4E64_VA(core->direct_map_pt);
+
+
+
+ //Fix up the PML entry
+ if (pml[pml_index].read == 0) {
+ pdpe = (ept_pdp_t *)create_ept_page();
+
+ // Set default PML Flags...
+ pml[pml_index].read = 1;
+ pml[pml_index].write = 1;
+ pml[pml_index].exec = 1;
+
+ pml[pml_index].pdp_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pdpe));
+ } else {
+ pdpe = V3_VAddr((void *)BASE_TO_PAGE_ADDR_4KB(pml[pml_index].pdp_base_addr));
+ }
+
+
+ // Fix up the PDPE entry
+ if (pdpe[pdpe_index].read == 0) {
+ pde = (ept_pde_t *)create_ept_page();
+
+ // Set default PDPE Flags...
+ pdpe[pdpe_index].read = 1;
+ pdpe[pdpe_index].write = 1;
+ pdpe[pdpe_index].exec = 1;
+
+ pdpe[pdpe_index].pd_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pde));
+ } else {
+ pde = V3_VAddr((void *)BASE_TO_PAGE_ADDR_4KB(pdpe[pdpe_index].pd_base_addr));
+ }
+
+
+
+ // Fix up the 2MiB PDE and exit here
+ if (page_size == PAGE_SIZE_2MB) {
+ pde2mb = (ept_pde_2MB_t *)pde; // all but these two lines are the same for PTE
+ pde2mb[pde_index].large_page = 1;
+
+ *actual_start = BASE_TO_PAGE_ADDR_2MB(PAGE_BASE_ADDR_2MB(fault_addr));
+ *actual_end = BASE_TO_PAGE_ADDR_2MB(PAGE_BASE_ADDR_2MB(fault_addr)+1)-1;
+
+ if (pde2mb[pde_index].read == 0) {
+
+ if ( (region->flags.alloced == 1) &&
+ (region->flags.read == 1)) {
+ // Full access
+ pde2mb[pde_index].read = 1;
+ pde2mb[pde_index].exec = 1;
+ pde2mb[pde_index].ipat = 1;
+ pde2mb[pde_index].mt = 6;
+
+ if (region->flags.write == 1) {
+ pde2mb[pde_index].write = 1;
+ } else {
+ pde2mb[pde_index].write = 0;
+ }
+
+ if (v3_gpa_to_hpa(core, fault_addr, &host_addr) == -1) {
+ PrintError(core->vm_info, core, "Error: Could not translate fault addr (%p)\n", (void *)fault_addr);
+ return -1;
+ }
+
+ pde2mb[pde_index].page_base_addr = PAGE_BASE_ADDR_2MB(host_addr);
+ } else {
+ return region->unhandled(core, fault_addr, fault_addr, region, error_code);
+ }
+ } else {
+ // We fix all permissions on the first pass,
+ // so we only get here if its an unhandled exception
+
+ return region->unhandled(core, fault_addr, fault_addr, region, error_code);
+ }
+
+ return 0;
+ }
+
+ // Continue with the 4KiB page heirarchy
+
+
+ // Fix up the PDE entry
+ if (pde[pde_index].read == 0) {
+ pte = (ept_pte_t *)create_ept_page();
+
+ pde[pde_index].read = 1;
+ pde[pde_index].write = 1;
+ pde[pde_index].exec = 1;
+
+ pde[pde_index].pt_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pte));
+ } else {
+ pte = V3_VAddr((void *)BASE_TO_PAGE_ADDR_4KB(pde[pde_index].pt_base_addr));
+ }
+
+
+ *actual_start = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr));
+ *actual_end = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr)+1)-1;
+
+
+ // Fix up the PTE entry
+ if (pte[pte_index].read == 0) {
+
+ if ( (region->flags.alloced == 1) &&
+ (region->flags.read == 1)) {
+ // Full access
+ pte[pte_index].read = 1;
+ pte[pte_index].exec = 1;
+ pte[pte_index].ipat = 1;
+ pte[pte_index].mt = 6;
+
+ if (region->flags.write == 1) {
+ pte[pte_index].write = 1;
+ } else {
+ pte[pte_index].write = 0;
+ }
+
+ if (v3_gpa_to_hpa(core, fault_addr, &host_addr) == -1) {
+ PrintError(core->vm_info, core, "Error Could not translate fault addr (%p)\n", (void *)fault_addr);
+ return -1;
+ }
+
+
+ pte[pte_index].page_base_addr = PAGE_BASE_ADDR_4KB(host_addr);
+ } else {
+ return region->unhandled(core, fault_addr, fault_addr, region, error_code);
+ }
+ } else {
+ // We fix all permissions on the first pass,
+ // so we only get here if its an unhandled exception
+
+ return region->unhandled(core, fault_addr, fault_addr, region, error_code);
+ }
+
+
+ return 0;
+}
+
+
+static int handle_vmx_invalidate_nested_addr_internal(struct guest_info *core, addr_t inv_addr,
+ addr_t *actual_start, uint64_t *actual_size) {
+ ept_pml4_t *pml = NULL;
+ ept_pdp_t *pdpe = NULL;
+ ept_pde_t *pde = NULL;
+ ept_pte_t *pte = NULL;
+
+
+
+ // clear the page table entry
+
+ int pml_index = PML4E64_INDEX(inv_addr);
+ int pdpe_index = PDPE64_INDEX(inv_addr);
+ int pde_index = PDE64_INDEX(inv_addr);
+ int pte_index = PTE64_INDEX(inv_addr);
+
+
+ pml = (ept_pml4_t *)CR3_TO_PML4E64_VA(core->direct_map_pt);
+
+
+ // note that there are no present bits in EPT, so we
+ // use the read bit to signify this.
+ // either an entry is read/write/exec or it is none of these
+
+ if (pml[pml_index].read == 0) {
+ // already invalidated
+ *actual_start = BASE_TO_PAGE_ADDR_512GB(PAGE_BASE_ADDR_512GB(inv_addr));
+ *actual_size = PAGE_SIZE_512GB;
+ return 0;
+ }
+
+ pdpe = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pml[pml_index].pdp_base_addr));
+
+ if (pdpe[pdpe_index].read == 0) {
+ // already invalidated
+ *actual_start = BASE_TO_PAGE_ADDR_1GB(PAGE_BASE_ADDR_1GB(inv_addr));
+ *actual_size = PAGE_SIZE_1GB;
+ return 0;
+ } else if (pdpe[pdpe_index].large_page == 1) { // 1GiB
+ pdpe[pdpe_index].read = 0;
+ pdpe[pdpe_index].write = 0;
+ pdpe[pdpe_index].exec = 0;
+ *actual_start = BASE_TO_PAGE_ADDR_1GB(PAGE_BASE_ADDR_1GB(inv_addr));
+ *actual_size = PAGE_SIZE_1GB;
+ return 0;
+ }
+
+ pde = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pdpe[pdpe_index].pd_base_addr));
+
+ if (pde[pde_index].read == 0) {
+ // already invalidated
+ *actual_start = BASE_TO_PAGE_ADDR_2MB(PAGE_BASE_ADDR_2MB(inv_addr));
+ *actual_size = PAGE_SIZE_2MB;
+ return 0;
+ } else if (pde[pde_index].large_page == 1) { // 2MiB
+ pde[pde_index].read = 0;
+ pde[pde_index].write = 0;
+ pde[pde_index].exec = 0;
+ *actual_start = BASE_TO_PAGE_ADDR_2MB(PAGE_BASE_ADDR_2MB(inv_addr));
+ *actual_size = PAGE_SIZE_2MB;
+ return 0;
+ }
+
+ pte = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pde[pde_index].pt_base_addr));
+
+ pte[pte_index].read = 0; // 4KiB
+ pte[pte_index].write = 0;
+ pte[pte_index].exec = 0;
+
+ *actual_start = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(inv_addr));
+ *actual_size = PAGE_SIZE_4KB;
+
+ return 0;
+}
+
+
+static int handle_vmx_invalidate_nested_addr(struct guest_info *core, addr_t inv_addr,
+ addr_t *actual_start, addr_t *actual_end)
+{
+ uint64_t len;
+ int rc;
+
+ rc = handle_vmx_invalidate_nested_addr_internal(core,inv_addr,actual_start,&len);
+
+ *actual_end = *actual_start + len - 1;
+
+ return rc;
+}
+
+
+static int handle_vmx_invalidate_nested_addr_range(struct guest_info *core,
+ addr_t inv_addr_start, addr_t inv_addr_end,
+ addr_t *actual_start, addr_t *actual_end)
+{
+ addr_t next;
+ addr_t start;
+ uint64_t len;
+ int rc;
+
+ for (next=inv_addr_start; next<=inv_addr_end; ) {
+ rc = handle_vmx_invalidate_nested_addr_internal(core,next,&start, &len);
+ if (next==inv_addr_start) {
+ // first iteration, capture where we start invalidating
+ *actual_start = start;
+ }
+ if (rc) {
+ return rc;
+ }
+ next = start + len;
+ *actual_end = next;
+ }
+ // last iteration, actual_end is off by one
+ (*actual_end)--;
+ return 0;
+}
+
+#endif