Refactoring and additions to direct paging (nested and passthrough)

diff --git a/palacios/include/palacios/vm_guest.h b/palacios/include/palacios/vm_guest.h

index 4a9d075..1819fa8 100644 (file)
--- a/palacios/include/palacios/vm_guest.h
+++ b/palacios/include/palacios/vm_guest.h
@@ -86,9 +86,12 @@ struct guest_info {
     void * sched_priv_data;
 
     v3_paging_mode_t shdw_pg_mode;
+    // arch-independent state of shadow pager
     struct v3_shdw_pg_state shdw_pg_state;
-    //struct v3_nested_pg_state nested_pg_state;
+    // arch-indepedent state of the passthrough pager
     addr_t direct_map_pt;
+    // arch-independent state of the nested pager (currently none)
+    // struct v3_nested_pg_state nested_pg_state;
     
 
     union {
@@ -120,6 +123,7 @@ struct guest_info {
 
     struct v3_fp_state fp_state;
 
+    // the arch-dependent state (SVM or VMX)
     void * vmm_data;
 
     uint64_t yield_start_cycle;
@@ -179,8 +183,12 @@ struct v3_vm_info {
 
     struct v3_mem_hooks mem_hooks;
 
+    // arch-indepentent state of shadow pager
     struct v3_shdw_impl_state shdw_impl;
-    //struct v3_nested_impl_state nested_impl;
+    // arch-independent state of passthrough pager (currently none)
+    struct v3_passthrough_impl_state passthrough_impl;
+    // arch-independent state of the nested pager
+    struct v3_nested_impl_state nested_impl;
     void * sched_priv_data;
 
     struct v3_io_map io_map;
diff --git a/palacios/include/palacios/vmm_direct_paging.h b/palacios/include/palacios/vmm_direct_paging.h

index 6404e6f..925a299 100644 (file)
--- a/palacios/include/palacios/vmm_direct_paging.h
+++ b/palacios/include/palacios/vmm_direct_paging.h
@@ -25,26 +25,140 @@
 
 #include <palacios/vmm_mem.h>
 #include <palacios/vmm_paging.h>
+#include <palacios/vmm_list.h>
+ 
+
+/**********************************
+   PASSTHROUGH PAGING - CORE FUNC
+ **********************************/
+
+
+struct v3_passthrough_impl_state {
+    // currently there is only a single implementation
+    // that internally includes SVM and VMX support
+    // The externally visible state is just the callbacks
+    struct list_head event_callback_list;
+};
+
+
+int v3_init_passthrough_paging(struct v3_vm_info *vm);
+int v3_init_passthrough_paging_core(struct guest_info *core);
+int v3_deinit_passthrough_paging(struct v3_vm_info *vm);
+int v3_deinit_passthrough_paging_core(struct guest_info *core);
 
 int v3_init_passthrough_pts(struct guest_info * guest_info);
 int v3_free_passthrough_pts(struct guest_info * core);
 
 int v3_reset_passthrough_pts(struct guest_info * guest_info);
 
-int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code);
-int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code);
+// actual_start/end may be null if you don't want this info
+// If non-null, these return the actual affected GPA range
+int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code,
+                                   addr_t *actual_start, addr_t *actual_end);
 
 int v3_activate_passthrough_pt(struct guest_info * info);
 
-int v3_invalidate_passthrough_addr(struct guest_info * info, addr_t inv_addr);
+int v3_invalidate_passthrough_addr(struct guest_info * info, addr_t inv_addr,
+                                  addr_t *actual_start, addr_t *actual_end);
+
 // The range invalidated is minimally [start, end]
 int v3_invalidate_passthrough_addr_range(struct guest_info * info, 
-                                        addr_t inv_addr_start, addr_t inv_addr_end);
+                                        addr_t inv_addr_start, addr_t inv_addr_end,
+                                        addr_t *actual_start, addr_t *actual_end);
+
+/**********************************
+   PASSTHROUGH PAGING - EVENTS
+ **********************************/
+
+struct v3_passthrough_pg_event {
+    enum {PASSTHROUGH_PAGEFAULT,PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_ACTIVATE} event_type;
+    enum {PASSTHROUGH_PREIMPL, PASSTHROUGH_POSTIMPL} event_order;
+    addr_t     gpa;        // for pf 
+    pf_error_t error_code; // for pf
+    addr_t     gpa_start;  // for invalidation of range or page fault
+    addr_t     gpa_end;    // for invalidation of range or page fault (range is [start,end] )
+                           // PREIMPL: start/end is the requested range
+                           // POSTIMPL: start/end is the actual range invalidated
+};
+
+
+
+int v3_register_passthrough_paging_event_callback(struct v3_vm_info *vm,
+                                                 int (*callback)(struct guest_info *core, 
+                                                                 struct v3_passthrough_pg_event *,
+                                                                 void      *priv_data),
+                                                 void *priv_data);
+
+int v3_unregister_passthrough_paging_event_callback(struct v3_vm_info *vm,
+                                                   int (*callback)(struct guest_info *core, 
+                                                                   struct v3_passthrough_pg_event,
+                                                                   void      *priv_data),
+                                                   void *priv_data);
+
+
+
+/*****************************
+   NESTED PAGING - CORE FUNC
+ *****************************/
+
+
+struct v3_nested_impl_state {
+    // currently there is only a single implementation
+    // that internally includes SVM and VMX support
+    // The externally visible state is just the callbacks
+    struct list_head event_callback_list;
+};
+
+int v3_init_nested_paging(struct v3_vm_info *vm);
+int v3_init_nested_paging_core(struct guest_info *core, void *hwinfo);
+int v3_deinit_nested_paging(struct v3_vm_info *vm);
+int v3_deinit_nested_paging_core(struct guest_info *core);
+
+
+// actual_start/end may be null if you don't want this info
+// If non-null, these return the actual affected GPA range
+int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, void *pfinfo,
+                              addr_t *actual_start, addr_t *actual_end);
+
+int v3_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr,
+                             addr_t *actual_start, addr_t *actual_end);
 
-int v3_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr);
 // The range invalidated is minimally [start, end]
 int v3_invalidate_nested_addr_range(struct guest_info * info, 
-                                   addr_t inv_addr_start, addr_t inv_addr_end);
+                                   addr_t inv_addr_start, addr_t inv_addr_end,
+                                   addr_t *actual_start, addr_t *actual_end);
+
+
+
+/*****************************
+   NESTED PAGING - EVENTS
+ *****************************/
+
+struct v3_nested_pg_event {
+    enum {NESTED_PAGEFAULT,NESTED_INVALIDATE_RANGE} event_type;
+    enum {NESTED_PREIMPL, NESTED_POSTIMPL} event_order;
+    addr_t     gpa;        // for pf 
+    pf_error_t error_code; // for pf
+    addr_t     gpa_start;  // for invalidation of range or page fault
+    addr_t     gpa_end;    // for invalidation of range or page fault (range is [start,end] )
+                           // PREIMPL: start/end is the requested range
+                           // POSTIMPL: start/end is the actual range invalidated
+};
+
+
+
+int v3_register_nested_paging_event_callback(struct v3_vm_info *vm,
+                                            int (*callback)(struct guest_info *core, 
+                                                            struct v3_nested_pg_event *,
+                                                            void      *priv_data),
+                                            void *priv_data);
+
+int v3_unregister_nested_paging_event_callback(struct v3_vm_info *vm,
+                                              int (*callback)(struct guest_info *core, 
+                                                              struct v3_nested_pg_event,
+                                                              void      *priv_data),
+                                              void *priv_data);
+
 
 #endif // ! __V3VEE__
 
diff --git a/palacios/include/palacios/vmm_telemetry.h b/palacios/include/palacios/vmm_telemetry.h

index a9fd4b7..e2069fb 100644 (file)
--- a/palacios/include/palacios/vmm_telemetry.h
+++ b/palacios/include/palacios/vmm_telemetry.h
@@ -68,6 +68,7 @@ void v3_add_telemetry_cb(struct v3_vm_info * vm,
                         void (*telemetry_fn)(struct v3_vm_info * vm, void * private_data, char * hdr),
                         void * private_data);
 
+
 #endif
 
 #endif
diff --git a/palacios/include/palacios/vmx_ept.h b/palacios/include/palacios/vmx_ept.h

index a9dac1c..06f1f8a 100644 (file)
--- a/palacios/include/palacios/vmx_ept.h
+++ b/palacios/include/palacios/vmx_ept.h
@@ -25,6 +25,7 @@
 
 #include <palacios/vmx_hw_info.h>
 
+
 /* The actual format of these data structures is specified as being machine 
    dependent. Thus the lengths of the base address fields are defined as variable. 
    To be safe we assume the maximum(?) size fields 
@@ -58,7 +59,6 @@ struct ept_exit_qual {
 
 
 
-
 typedef struct vmx_eptp {
     uint64_t psmt            : 3; /* (0=UC, 6=WB) */
     uint64_t pwl1            : 3; /* 1 less than EPT page-walk length (?)*/
@@ -148,8 +148,6 @@ typedef struct ept_pte {
     uint64_t ignore2         : 12;
 } __attribute__((packed)) ept_pte_t;
 
-int v3_init_ept(struct guest_info * core, struct vmx_hw_info * hw_info);
-int v3_handle_ept_fault(struct guest_info * core, addr_t fault_addr, struct ept_exit_qual * ept_qual);
 
 
 #endif 
diff --git a/palacios/src/palacios/Makefile b/palacios/src/palacios/Makefile

index 2ad3889..70d5475 100644 (file)
--- a/palacios/src/palacios/Makefile
+++ b/palacios/src/palacios/Makefile
@@ -73,7 +73,6 @@ obj-$(V3_CONFIG_VMX) +=       vmx.o \
                                vmcs.o \
                                vmx_ctrl_regs.o \
                                vmx_assist.o \
-                               vmx_ept.o \
                                vmx_exits.o
 
 
diff --git a/palacios/src/palacios/svm_handler.c b/palacios/src/palacios/svm_handler.c

index 48509be..78f62e8 100644 (file)
--- a/palacios/src/palacios/svm_handler.c
+++ b/palacios/src/palacios/svm_handler.c
@@ -231,7 +231,7 @@ int v3_handle_svm_exit(struct guest_info * info, addr_t exit_code, addr_t exit_i
            pf_error_t * error_code = (pf_error_t *)&(exit_info1);
 
            if (info->shdw_pg_mode == NESTED_PAGING) {
-               if (v3_handle_nested_pagefault(info, fault_addr, *error_code) == -1) {
+             if (v3_handle_nested_pagefault(info, fault_addr, error_code, NULL, NULL) == -1) {
                    return -1;
                }
            } else {
diff --git a/palacios/src/palacios/svm_npt.h b/palacios/src/palacios/svm_npt.h

new file mode 100644 (file)

index 0000000..3094d3d
--- /dev/null
+++ b/palacios/src/palacios/svm_npt.h
@@ -0,0 +1,117 @@
+#ifndef V3_CONFIG_SVM
+
+
+static int handle_svm_nested_pagefault(struct guest_info * info, addr_t fault_addr, void *pfinfo,
+                                      addr_t *actual_start, addr_t *actual_end) 
+{
+    PrintError(info->vm_info, info, "Cannot do nested page fault as SVM is not enabled.\n");
+    return -1;
+}
+static int handle_svm_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr,
+                                            addr_t *actual_start, addr_t *actual_end) 
+{
+    PrintError(info->vm_info, info, "Cannot do invalidate nested addr as SVM is not enabled.\n");
+    return -1;
+}
+static int handle_svm_invalidate_nested_addr_range(struct guest_info * info, 
+                                                  addr_t inv_addr_start, addr_t inv_addr_end,
+                                                  addr_t *actual_start, addr_t *actual_end) 
+{
+    PrintError(info->vm_info, info, "Cannot do invalidate nested addr range as SVM is not enabled.\n");
+    return -1;
+}
+
+#else
+
+static int handle_svm_nested_pagefault(struct guest_info * info, addr_t fault_addr, void *pfinfo,
+                                      addr_t *actual_start, addr_t *actual_end) 
+{
+    pf_error_t error_code = *((pf_error_t *) pfinfo);
+    v3_cpu_mode_t mode = v3_get_host_cpu_mode();
+
+
+    PrintDebug(info->vm_info, info, "Nested PageFault: fault_addr=%p, error_code=%u\n", (void *)fault_addr, *(uint_t *)&error_code);
+
+    switch(mode) {
+       case REAL:
+       case PROTECTED:
+         return handle_passthrough_pagefault_32(info, fault_addr, error_code, actual_start, actual_end);
+           
+       case PROTECTED_PAE:
+         return handle_passthrough_pagefault_32pae(info, fault_addr, error_code, actual_start, actual_end);
+           
+       case LONG:
+       case LONG_32_COMPAT:
+         return handle_passthrough_pagefault_64(info, fault_addr, error_code, actual_start, actual_end);   
+           
+       default:
+           PrintError(info->vm_info, info, "Unknown CPU Mode\n");
+           break;
+    }
+    return -1;
+}
+
+
+static int handle_svm_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr,
+                                            addr_t *actual_start, addr_t *actual_end) {
+
+#ifdef __V3_64BIT__
+    v3_cpu_mode_t mode = LONG;
+#else 
+#error Compilation for 32 bit target detected
+    v3_cpu_mode_t mode = PROTECTED;
+#endif
+
+    switch(mode) {
+       case REAL:
+       case PROTECTED:
+         return invalidate_addr_32(info, inv_addr, actual_start, actual_end);
+
+       case PROTECTED_PAE:
+         return invalidate_addr_32pae(info, inv_addr, actual_start, actual_end);
+
+       case LONG:
+       case LONG_32_COMPAT:
+         return invalidate_addr_64(info, inv_addr, actual_start, actual_end);      
+       
+       default:
+           PrintError(info->vm_info, info, "Unknown CPU Mode\n");
+           break;
+    }
+
+    return -1;
+}
+
+static int handle_svm_invalidate_nested_addr_range(struct guest_info * info, 
+                                                  addr_t inv_addr_start, addr_t inv_addr_end,
+                                                  addr_t *actual_start, addr_t *actual_end) 
+{
+    
+#ifdef __V3_64BIT__
+    v3_cpu_mode_t mode = LONG;
+#else 
+#error Compilation for 32 bit target detected
+    v3_cpu_mode_t mode = PROTECTED;
+#endif
+
+    switch(mode) {
+       case REAL:
+       case PROTECTED:
+         return invalidate_addr_32_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
+
+       case PROTECTED_PAE:
+         return invalidate_addr_32pae_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
+
+       case LONG:
+       case LONG_32_COMPAT:
+         return invalidate_addr_64_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);  
+       
+       default:
+           PrintError(info->vm_info, info, "Unknown CPU Mode\n");
+           break;
+    }
+
+    return -1;
+}
+
+#endif
diff --git a/palacios/src/palacios/vm_guest.c b/palacios/src/palacios/vm_guest.c

index 5c98848..a12c764 100644 (file)
--- a/palacios/src/palacios/vm_guest.c
+++ b/palacios/src/palacios/vm_guest.c
@@ -250,6 +250,15 @@ int v3_init_vm(struct v3_vm_info * vm) {
        return -1;
     }
 
+    if (v3_init_passthrough_paging(vm) == -1) {
+        PrintError(vm, VCORE_NONE, "VM initialization error in passthrough paging\n");
+       return -1;
+    }
+
+    if (v3_init_nested_paging(vm) == -1) {
+        PrintError(vm, VCORE_NONE, "VM initialization error in nested paging\n");
+       return -1;
+    }
 
     v3_init_time_vm(vm);
 
@@ -335,6 +344,8 @@ int v3_free_vm_internal(struct v3_vm_info * vm) {
     v3_deinit_mem_hooks(vm);
     v3_delete_mem_map(vm);
     v3_deinit_shdw_impl(vm);
+    v3_deinit_passthrough_paging(vm);
+    v3_deinit_nested_paging(vm);
 
     v3_deinit_ext_manager(vm);
     v3_deinit_intr_routers(vm);
@@ -376,7 +387,11 @@ int v3_init_core(struct guest_info * core) {
 #endif
 
     if (core->shdw_pg_mode == SHADOW_PAGING) {
+        v3_init_passthrough_paging_core(core);
        v3_init_shdw_pg_state(core);
+    } else {
+        //done later due to SVM/VMX differences 
+        //v3_init_nested_paging_core(core);
     }
 
     v3_init_time_core(core);
@@ -441,6 +456,9 @@ int v3_free_core(struct guest_info * core) {
 
     if (core->shdw_pg_mode == SHADOW_PAGING) {
        v3_deinit_shdw_pg_state(core);
+        v3_deinit_passthrough_paging_core(core);
+   } else {
+        v3_deinit_nested_paging_core(core);
     }
 
     v3_free_passthrough_pts(core);
diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c

index 12766b9..88fa79a 100644 (file)
--- a/palacios/src/palacios/vmm.c
+++ b/palacios/src/palacios/vmm.c
@@ -587,7 +587,7 @@ int v3_move_vm_mem(struct v3_vm_info * vm, void *gpa, int target_cpu) {
            v3_invalidate_shadow_pts(&(vm->cores[i]));
        } else if (vm->cores[i].shdw_pg_mode==NESTED_PAGING) { 
            // nested invalidator uses inclusive addressing [start,end], not [start,end)
-           v3_invalidate_nested_addr_range(&(vm->cores[i]),reg->guest_start,reg->guest_end-1);
+         v3_invalidate_nested_addr_range(&(vm->cores[i]),reg->guest_start,reg->guest_end-1,NULL,NULL);
        } else {
            PrintError(vm,VCORE_NONE, "Cannot determine how to invalidate paging structures! Reverting to previous region.\n");
            // We'll restore things...
diff --git a/palacios/src/palacios/vmm_direct_paging.c b/palacios/src/palacios/vmm_direct_paging.c

index 6bf987e..32aba7c 100644 (file)
--- a/palacios/src/palacios/vmm_direct_paging.c
+++ b/palacios/src/palacios/vmm_direct_paging.c
@@ -13,6 +13,7 @@
  * All rights reserved.
  *
  * Author: Steven Jaconette <stevenjaconette2007@u.northwestern.edu>
+ *         Peter Dinda <pdinda@northwestern.edu> (refactor + events)
  *
  * This is free software.  You are permitted to use,
  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
@@ -26,12 +27,121 @@
 #include <palacios/vmm_ctrl_regs.h>
 
 
-#ifndef V3_CONFIG_DEBUG_NESTED_PAGING
+#if !defined(V3_CONFIG_DEBUG_NESTED_PAGING) && !defined(V3_CONFIG_DEBUG_SHADOW_PAGING)
 #undef PrintDebug
 #define PrintDebug(fmt, args...)
 #endif
 
 
+
+/*
+
+  "Direct Paging" combines these three functionalities:
+
+   1. Passthrough paging for SVM and VMX
+
+      Passthrough paging is used for shadow paging when
+      the guest does not have paging turn on, for example 
+      when it is running in real mode or protected mode 
+      early in a typical boot process.    Passthrough page
+      tables are shadow page tables that are built assuming
+      the guest virtual to guest physical mapping is the identity.
+      Thus, what they implement are the GPA->HPA mapping. 
+
+      Passthrough page tables are built using 32PAE paging.
+      
+
+   2. Nested paging on SVM
+  
+      The SVM nested page tables have the same format as
+      regular page tables.   For this reason, we can reuse 
+      much of the passthrough implementation.   A nested page
+      table mapping is a GPA->HPA mapping, creating a very 
+      simlar model as with passthrough paging, just that it's 
+      always active, whether the guest has paging on or not.
+
+
+   3. Nested paging on VMX
+
+      The VMX nested page tables have a different format
+      than regular page tables.  For this reason, we have
+      implemented them in the vmx_npt.h file.  The code
+      here then is a wrapper, allowing us to make nested
+      paging functionality appear uniform across VMX and SVM
+      elsewhere in the codebase.
+
+*/
+
+
+
+static inline int is_vmx_nested()
+{
+    extern v3_cpu_arch_t v3_mach_type;
+
+    return (v3_mach_type==V3_VMX_EPT_CPU || v3_mach_type==V3_VMX_EPT_UG_CPU);
+}
+
+static inline int is_svm_nested()
+{
+    extern v3_cpu_arch_t v3_mach_type;
+
+    return (v3_mach_type==V3_SVM_REV3_CPU);
+}
+
+
+struct passthrough_event_callback {
+    int (*callback)(struct guest_info *core, struct v3_passthrough_pg_event *event, void *priv_data);
+    void *priv_data;
+
+    struct list_head node;
+};
+
+
+static int have_passthrough_callbacks(struct guest_info *core)
+{
+    return !list_empty(&(core->vm_info->passthrough_impl.event_callback_list));
+}
+
+static void dispatch_passthrough_event(struct guest_info *core, struct v3_passthrough_pg_event *event)
+{
+    struct passthrough_event_callback *cb,*temp;
+    
+    list_for_each_entry_safe(cb,
+                            temp,
+                            &(core->vm_info->passthrough_impl.event_callback_list),
+                            node) {
+       cb->callback(core,event,cb->priv_data);
+    }
+}
+
+struct nested_event_callback {
+    int (*callback)(struct guest_info *core, struct v3_nested_pg_event *event, void *priv_data);
+    void *priv_data;
+
+    struct list_head node;
+};
+
+
+static int have_nested_callbacks(struct guest_info *core)
+{
+    return !list_empty(&(core->vm_info->nested_impl.event_callback_list));
+}
+
+static void dispatch_nested_event(struct guest_info *core, struct v3_nested_pg_event *event)
+{
+    struct nested_event_callback *cb,*temp;
+    
+    list_for_each_entry_safe(cb,
+                            temp,
+                            &(core->vm_info->nested_impl.event_callback_list),
+                            node) {
+       cb->callback(core,event,cb->priv_data);
+    }
+}
+
+
+
+
 static addr_t create_generic_pt_page(struct guest_info *core) {
     void * page = 0;
     void *temp;
@@ -54,6 +164,8 @@ static addr_t create_generic_pt_page(struct guest_info *core) {
 #include "vmm_direct_paging_32pae.h"
 #include "vmm_direct_paging_64.h"
 
+
+
 int v3_init_passthrough_pts(struct guest_info * info) {
     info->direct_map_pt = (addr_t)V3_PAddr((void *)create_generic_pt_page(info));
     return 0;
@@ -101,6 +213,12 @@ int v3_activate_passthrough_pt(struct guest_info * info) {
     // For now... But we need to change this....
     // As soon as shadow paging becomes active the passthrough tables are hosed
     // So this will cause chaos if it is called at that time
+
+    if (have_passthrough_callbacks(info)) { 
+       struct v3_passthrough_pg_event event={PASSTHROUGH_ACTIVATE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},0,0};
+       dispatch_passthrough_event(info,&event);
+    }
+       
     struct cr3_32_PAE * shadow_cr3 = (struct cr3_32_PAE *) &(info->ctrl_regs.cr3);
     struct cr4_32 * shadow_cr4 = (struct cr4_32 *) &(info->ctrl_regs.cr4);
     addr_t shadow_pt_addr = *(addr_t*)&(info->direct_map_pt);
@@ -108,12 +226,33 @@ int v3_activate_passthrough_pt(struct guest_info * info) {
     shadow_cr3->pdpt_base_addr = shadow_pt_addr >> 5;
     shadow_cr4->pae = 1;
     PrintDebug(info->vm_info, info, "Activated Passthrough Page tables\n");
+
+    if (have_passthrough_callbacks(info)) { 
+       struct v3_passthrough_pg_event event={PASSTHROUGH_ACTIVATE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},0,0};
+       dispatch_passthrough_event(info,&event);
+    }
+
     return 0;
 }
 
 
-int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) {
+
+int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code,
+                                   addr_t *actual_start, addr_t *actual_end) {
     v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
+    addr_t start, end;
+    int rc;
+
+    if (have_passthrough_callbacks(info)) {                                   
+       struct v3_passthrough_pg_event event={PASSTHROUGH_PAGEFAULT,PASSTHROUGH_PREIMPL,fault_addr,error_code,fault_addr,fault_addr};
+       dispatch_passthrough_event(info,&event);        
+    }
+
+    if (!actual_start) { actual_start=&start; }
+    if (!actual_end) { actual_end=&end; }
+
+
+    rc=-1;
 
     switch(mode) {
        case REAL:
@@ -124,44 +263,41 @@ int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr,
        case LONG:
        case LONG_32_COMPAT:
            // Long mode will only use 32PAE page tables...
-           return handle_passthrough_pagefault_32pae(info, fault_addr, error_code);
+           rc=handle_passthrough_pagefault_32pae(info, fault_addr, error_code, actual_start, actual_end);
 
        default:
            PrintError(info->vm_info, info, "Unknown CPU Mode\n");
            break;
     }
-    return -1;
+
+    if (have_passthrough_callbacks(info)) {                                   
+       struct v3_passthrough_pg_event event={PASSTHROUGH_PAGEFAULT,PASSTHROUGH_POSTIMPL,fault_addr,error_code,*actual_start,*actual_end};
+       dispatch_passthrough_event(info,&event);        
+    }
+
+    return rc;
 }
 
 
 
-int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) {
-    v3_cpu_mode_t mode = v3_get_host_cpu_mode();
+int v3_invalidate_passthrough_addr(struct guest_info * info, addr_t inv_addr, 
+                                  addr_t *actual_start, addr_t *actual_end) {
 
+    v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
+    addr_t start, end;
+    int rc;
 
-    PrintDebug(info->vm_info, info, "Nested PageFault: fault_addr=%p, error_code=%u\n", (void *)fault_addr, *(uint_t *)&error_code);
+    if (have_passthrough_callbacks(info)) {                                   
+       struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr),PAGE_ADDR(inv_addr)+PAGE_SIZE-1};
+       dispatch_passthrough_event(info,&event);        
+    }
 
-    switch(mode) {
-       case REAL:
-       case PROTECTED:
-           return handle_passthrough_pagefault_32(info, fault_addr, error_code);
+    if (!actual_start) { actual_start=&start;}
+    if (!actual_end) { actual_end=&end;}
 
-       case PROTECTED_PAE:
-           return handle_passthrough_pagefault_32pae(info, fault_addr, error_code);
 
-       case LONG:
-       case LONG_32_COMPAT:
-           return handle_passthrough_pagefault_64(info, fault_addr, error_code);           
-       
-       default:
-           PrintError(info->vm_info, info, "Unknown CPU Mode\n");
-           break;
-    }
-    return -1;
-}
 
-int v3_invalidate_passthrough_addr(struct guest_info * info, addr_t inv_addr) {
-    v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
+    rc=-1;
 
     switch(mode) {
        case REAL:
@@ -172,19 +308,39 @@ int v3_invalidate_passthrough_addr(struct guest_info * info, addr_t inv_addr) {
        case LONG:
        case LONG_32_COMPAT:
            // Long mode will only use 32PAE page tables...
-           return invalidate_addr_32pae(info, inv_addr);
+         rc=invalidate_addr_32pae(info, inv_addr, actual_start, actual_end);
 
        default:
            PrintError(info->vm_info, info, "Unknown CPU Mode\n");
            break;
     }
-    return -1;
+
+    if (have_passthrough_callbacks(info)) {                                   
+       struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},*actual_start,*actual_end};
+       dispatch_passthrough_event(info,&event);        
+    }
+
+
+    return rc;
 }
 
 
 int v3_invalidate_passthrough_addr_range(struct guest_info * info, 
-                                        addr_t inv_addr_start, addr_t inv_addr_end) {
+                                        addr_t inv_addr_start, addr_t inv_addr_end,
+                                        addr_t *actual_start, addr_t *actual_end) {
     v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
+    addr_t start, end;
+    int rc;
+
+    if (!actual_start) { actual_start=&start;}
+    if (!actual_end) { actual_end=&end;}
+
+    if (have_passthrough_callbacks(info)) {                                   
+       struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr_start),PAGE_ADDR(inv_addr_end-1)+PAGE_SIZE-1};
+       dispatch_passthrough_event(info,&event);        
+    }
+    
+    rc=-1;
 
     switch(mode) {
        case REAL:
@@ -195,68 +351,202 @@ int v3_invalidate_passthrough_addr_range(struct guest_info * info,
        case LONG:
        case LONG_32_COMPAT:
            // Long mode will only use 32PAE page tables...
-           return invalidate_addr_32pae_range(info, inv_addr_start, inv_addr_end);
+         rc=invalidate_addr_32pae_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
 
        default:
            PrintError(info->vm_info, info, "Unknown CPU Mode\n");
            break;
     }
-    return -1;
+
+    if (have_passthrough_callbacks(info)) {                                   
+       struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},*actual_start,*actual_end};
+       dispatch_passthrough_event(info,&event);        
+    }
+
+    return rc;
 }
 
-int v3_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr) {
 
-#ifdef __V3_64BIT__
-    v3_cpu_mode_t mode = LONG;
-#else 
-    v3_cpu_mode_t mode = PROTECTED;
-#endif
+int v3_init_passthrough_paging(struct v3_vm_info *vm)
+{
+  INIT_LIST_HEAD(&(vm->passthrough_impl.event_callback_list));
+  return 0;
+}
 
-    switch(mode) {
-       case REAL:
-       case PROTECTED:
-           return invalidate_addr_32(info, inv_addr);
+int v3_deinit_passthrough_paging(struct v3_vm_info *vm)
+{
+  struct passthrough_event_callback *cb,*temp;
+  
+  list_for_each_entry_safe(cb,
+                          temp,
+                          &(vm->passthrough_impl.event_callback_list),
+                          node) {
+    list_del(&(cb->node));
+    V3_Free(cb);
+  }
+  
+  return 0;
+}
 
-       case PROTECTED_PAE:
-           return invalidate_addr_32pae(info, inv_addr);
+int v3_init_passthrough_paging_core(struct guest_info *core)
+{
+  // currently nothing to init
+  return 0;
+}
+
+int v3_deinit_passthrough_paging_core(struct guest_info *core)
+{
+  // currently nothing to deinit
+  return 0;
+}
+
+
+// inline nested paging support for Intel and AMD
+#include "svm_npt.h"
+#include "vmx_npt.h"
 
-       case LONG:
-       case LONG_32_COMPAT:
-           return invalidate_addr_64(info, inv_addr);      
-       
-       default:
-           PrintError(info->vm_info, info, "Unknown CPU Mode\n");
-           break;
-    }
 
-    return -1;
+inline void convert_to_pf_error(void *pfinfo, pf_error_t *out)
+{
+  if (is_vmx_nested()) {
+#ifdef V3_CONFIG_VMX
+    ept_exit_qual_to_pf_error((struct ept_exit_qual *)pfinfo, out);
+#endif
+  } else {
+    *out = *(pf_error_t *)pfinfo;
+  }
+}
+
+int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, void *pfinfo, addr_t *actual_start, addr_t *actual_end)
+{
+  int rc;
+  pf_error_t err;
+  addr_t start, end;
+
+  if (!actual_start) { actual_start=&start; }
+  if (!actual_end) { actual_end=&end; }
+
+  convert_to_pf_error(pfinfo,&err);
+
+  if (have_nested_callbacks(info)) {                                  
+      struct v3_nested_pg_event event={NESTED_PAGEFAULT,NESTED_PREIMPL,fault_addr,err,fault_addr,fault_addr};
+      dispatch_nested_event(info,&event);      
+  }
+
+  
+  if (is_vmx_nested()) { 
+    rc = handle_vmx_nested_pagefault(info,fault_addr,pfinfo,actual_start,actual_end);
+  } else {
+    rc = handle_svm_nested_pagefault(info,fault_addr,pfinfo,actual_start,actual_end);
+  }
+  
+  if (have_nested_callbacks(info)) {
+    struct v3_nested_pg_event event={NESTED_PAGEFAULT,NESTED_POSTIMPL,fault_addr,err,*actual_start,*actual_end};
+    dispatch_nested_event(info,&event);
+  }
+  
+  return rc;
+}
+  
+
+
+int v3_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr,
+                             addr_t *actual_start, addr_t *actual_end) 
+{
+  int rc;
+  
+  addr_t start, end;
+
+  if (!actual_start) { actual_start=&start; }
+  if (!actual_end) { actual_end=&end; }
+  
+
+  if (have_nested_callbacks(info)) { 
+    struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr),PAGE_ADDR(inv_addr)+PAGE_SIZE-1};
+    dispatch_nested_event(info,&event);
+  }
+
+  if (is_vmx_nested()) {
+    rc = handle_vmx_invalidate_nested_addr(info, inv_addr, actual_start, actual_end);
+  } else {
+    rc = handle_svm_invalidate_nested_addr(info, inv_addr, actual_start, actual_end);
+  }
+  
+  if (have_nested_callbacks(info)) { 
+    struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_POSTIMPL,0,{0,0,0,0,0,0},*actual_start, *actual_end};
+    dispatch_nested_event(info,&event);
+  }
+  return rc;
 }
 
+
 int v3_invalidate_nested_addr_range(struct guest_info * info, 
-                                   addr_t inv_addr_start, addr_t inv_addr_end) {
+                                   addr_t inv_addr_start, addr_t inv_addr_end,
+                                   addr_t *actual_start, addr_t *actual_end) 
+{
+  int rc;
+
+  addr_t start, end;
+
+  if (!actual_start) { actual_start=&start; }
+  if (!actual_end) { actual_end=&end; }
+
+  if (have_nested_callbacks(info)) { 
+    struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr_start),PAGE_ADDR(inv_addr_end-1)+PAGE_SIZE-1};
+    dispatch_nested_event(info,&event);
+  }
+  
+  if (is_vmx_nested()) {
+    rc = handle_vmx_invalidate_nested_addr_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
+  } else {
+    rc = handle_svm_invalidate_nested_addr_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
+  }
+  
+
+  if (have_nested_callbacks(info)) { 
+    struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},*actual_start, *actual_end};
+    dispatch_nested_event(info,&event);
+  }
+  
+  return rc;
+  
+}
 
-#ifdef __V3_64BIT__
-    v3_cpu_mode_t mode = LONG;
-#else 
-    v3_cpu_mode_t mode = PROTECTED;
-#endif
 
-    switch(mode) {
-       case REAL:
-       case PROTECTED:
-           return invalidate_addr_32_range(info, inv_addr_start, inv_addr_end);
+int v3_init_nested_paging(struct v3_vm_info *vm)
+{
+  INIT_LIST_HEAD(&(vm->nested_impl.event_callback_list));
+  return 0;
+}
 
-       case PROTECTED_PAE:
-           return invalidate_addr_32pae_range(info, inv_addr_start, inv_addr_end);
+int v3_init_nested_paging_core(struct guest_info *core, void *hwinfo)
+{
+  if (is_vmx_nested()) { 
+    return init_ept(core, (struct vmx_hw_info *) hwinfo);
+  } else {
+    // no initialization for SVM
+    return 0;
+  }
+}
+    
+int v3_deinit_nested_paging(struct v3_vm_info *vm)
+{
+  struct nested_event_callback *cb,*temp;
+  
+  list_for_each_entry_safe(cb,
+                          temp,
+                          &(vm->nested_impl.event_callback_list),
+                          node) {
+    list_del(&(cb->node));
+    V3_Free(cb);
+  }
+  
+  return 0;
+}
 
-       case LONG:
-       case LONG_32_COMPAT:
-           return invalidate_addr_64_range(info, inv_addr_start, inv_addr_end);            
-       
-       default:
-           PrintError(info->vm_info, info, "Unknown CPU Mode\n");
-           break;
-    }
+int v3_deinit_nested_paging_core(struct guest_info *core)
+{
+  // nothing to do..  probably dealloc?  FIXME PAD
 
-    return -1;
+  return 0;
 }
diff --git a/palacios/src/palacios/vmm_direct_paging_32.h b/palacios/src/palacios/vmm_direct_paging_32.h

index 7b49d58..435c3b0 100644 (file)
--- a/palacios/src/palacios/vmm_direct_paging_32.h
+++ b/palacios/src/palacios/vmm_direct_paging_32.h
@@ -29,9 +29,13 @@
 #include <palacios/vmm_ctrl_regs.h>
 
 
+/* This always build 2 level page tables - no large pages are used */
+
 static inline int handle_passthrough_pagefault_32(struct guest_info * info, 
                                                  addr_t fault_addr, 
-                                                 pf_error_t error_code) {
+                                                 pf_error_t error_code,
+                                                 addr_t *actual_start, addr_t *actual_end) {
+
     // Check to see if pde and pte exist (create them if not)
     pde32_t * pde = NULL;
     pte32_t * pte = NULL;
@@ -56,6 +60,9 @@ static inline int handle_passthrough_pagefault_32(struct guest_info * info,
     }
 
 
+    *actual_start = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr));
+    *actual_end = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr)+1)-1;
+
     // Fix up the PDE entry
     if (pde[pde_index].present == 0) {
        pte = (pte32_t *)create_generic_pt_page(info);
@@ -153,15 +160,21 @@ static inline int invalidate_addr_32_internal(struct guest_info * info, addr_t i
 }
 
 
-static inline int invalidate_addr_32(struct guest_info * core, addr_t inv_addr)
+static inline int invalidate_addr_32(struct guest_info * core, addr_t inv_addr,
+                                    addr_t *actual_start, addr_t *actual_end)
 {
-  addr_t start;
   uint64_t len;
-  
-  return invalidate_addr_32_internal(core,inv_addr,&start,&len);
+  int rc;
+
+  rc = invalidate_addr_32_internal(core,inv_addr,actual_start,&len);
+
+  *actual_end = *actual_start + len - 1;
+
+  return rc;
 }
    
-static inline int invalidate_addr_32_range(struct guest_info * core, addr_t inv_addr_start, addr_t inv_addr_end)
+static inline int invalidate_addr_32_range(struct guest_info * core, addr_t inv_addr_start, addr_t inv_addr_end,
+                                          addr_t *actual_start, addr_t *actual_end)
 {
   addr_t next;
   addr_t start;
@@ -170,11 +183,18 @@ static inline int invalidate_addr_32_range(struct guest_info * core, addr_t inv_
   
   for (next=inv_addr_start; next<=inv_addr_end; ) {
     rc = invalidate_addr_32_internal(core,next,&start, &len);
-    if (rc) { 
+     if (next==inv_addr_start) { 
+      // first iteration, capture where we start invalidating
+      *actual_start = start;
+    }
+   if (rc) { 
       return rc;
     }
     next = start + len;
+    *actual_end = next;
   }
+  // last iteration, actual_end is off by one
+  (*actual_end)--;
   return 0;
 }
 
diff --git a/palacios/src/palacios/vmm_direct_paging_32pae.h b/palacios/src/palacios/vmm_direct_paging_32pae.h

index 7414a02..6d6e403 100644 (file)
--- a/palacios/src/palacios/vmm_direct_paging_32pae.h
+++ b/palacios/src/palacios/vmm_direct_paging_32pae.h
@@ -27,10 +27,12 @@
 #include <palacios/vm_guest_mem.h>
 #include <palacios/vm_guest.h>
 
+/* This always builds 3 level page tables - no large pages */
 
 static inline int handle_passthrough_pagefault_32pae(struct guest_info * info, 
                                                     addr_t fault_addr, 
-                                                    pf_error_t error_code) {
+                                                    pf_error_t error_code,
+                                                    addr_t *actual_start, addr_t *actual_end) {
     pdpe32pae_t * pdpe = NULL;
     pde32pae_t * pde = NULL;
     pte32pae_t * pte = NULL;
@@ -71,6 +73,9 @@ static inline int handle_passthrough_pagefault_32pae(struct guest_info * info,
     }
     PrintDebug(info->vm_info, info, "Handling pde error pd base address =%p\n", (void *)pde);
 
+    *actual_start = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr));
+    *actual_end = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr)+1)-1;
+
     // Fix up the PDE entry
     if (pde[pde_index].present == 0) {
        pte = (pte32pae_t *)create_generic_pt_page(info);
@@ -85,6 +90,8 @@ static inline int handle_passthrough_pagefault_32pae(struct guest_info * info,
     }
 
     PrintDebug(info->vm_info, info, "Handling pte error pt base address=%p\n", (void *)pte);
+
+
     // Fix up the PTE entry
     if (pte[pte_index].present == 0) {
        pte[pte_index].user_page = 1;
@@ -175,15 +182,23 @@ static inline int invalidate_addr_32pae_internal(struct guest_info * info, addr_
 
 
 
-static inline int invalidate_addr_32pae(struct guest_info * core, addr_t inv_addr)
+static inline int invalidate_addr_32pae(struct guest_info * core, addr_t inv_addr,
+                                       addr_t *actual_start, addr_t *actual_end)
 {
-  addr_t start;
   uint64_t len;
+  int rc;
   
-  return invalidate_addr_32pae_internal(core,inv_addr,&start,&len);
+  rc = invalidate_addr_32pae_internal(core,inv_addr,actual_start,&len);
+
+  *actual_end = *actual_start + len - 1;
+
+  return rc;
+    
+
 }
    
-static inline int invalidate_addr_32pae_range(struct guest_info * core, addr_t inv_addr_start, addr_t inv_addr_end)
+static inline int invalidate_addr_32pae_range(struct guest_info * core, addr_t inv_addr_start, addr_t inv_addr_end,
+                                             addr_t *actual_start, addr_t *actual_end)
 {
   addr_t next;
   addr_t start;
@@ -192,11 +207,18 @@ static inline int invalidate_addr_32pae_range(struct guest_info * core, addr_t i
   
   for (next=inv_addr_start; next<=inv_addr_end; ) {
     rc = invalidate_addr_32pae_internal(core,next,&start, &len);
+    if (next==inv_addr_start) { 
+      // first iteration, capture where we start invalidating
+      *actual_start = start;
+    }
     if (rc) { 
       return rc;
     }
     next = start + len;
+    *actual_end = next;
   }
+  // last iteration, actual_end is off by one
+  (*actual_end)--;
   return 0;
 }
 
diff --git a/palacios/src/palacios/vmm_direct_paging_64.h b/palacios/src/palacios/vmm_direct_paging_64.h

index 92af1f5..6afb9fb 100644 (file)
--- a/palacios/src/palacios/vmm_direct_paging_64.h
+++ b/palacios/src/palacios/vmm_direct_paging_64.h
@@ -27,9 +27,12 @@
 #include <palacios/vm_guest_mem.h>
 #include <palacios/vm_guest.h>
 
+/* this always builds 4 level page tables, but large pages are allowed */
+
 // Reference: AMD Software Developer Manual Vol.2 Ch.5 "Page Translation and Protection"
 
-static inline int handle_passthrough_pagefault_64(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
+static inline int handle_passthrough_pagefault_64(struct guest_info * core, addr_t fault_addr, pf_error_t error_code,
+                                                 addr_t *actual_start, addr_t *actual_end) {
     pml4e64_t * pml      = NULL;
     pdpe64_t * pdpe      = NULL;
     pde64_t * pde        = NULL;
@@ -101,6 +104,9 @@ static inline int handle_passthrough_pagefault_64(struct guest_info * core, addr
        pde2mb = (pde64_2MB_t *)pde; // all but these two lines are the same for PTE
        pde2mb[pde_index].large_page = 1;
 
+       *actual_start = BASE_TO_PAGE_ADDR_2MB(PAGE_BASE_ADDR_2MB(fault_addr));
+       *actual_end = BASE_TO_PAGE_ADDR_2MB(PAGE_BASE_ADDR_2MB(fault_addr)+1)-1;
+
        if (pde2mb[pde_index].present == 0) {
            pde2mb[pde_index].user_page = 1;
 
@@ -137,6 +143,9 @@ static inline int handle_passthrough_pagefault_64(struct guest_info * core, addr
 
     // Continue with the 4KiB page heirarchy
     
+    *actual_start = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr));
+    *actual_end = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr)+1)-1;
+
     // Fix up the PDE entry
     if (pde[pde_index].present == 0) {
        pte = (pte64_t *)create_generic_pt_page(core);
@@ -257,15 +266,21 @@ static inline int invalidate_addr_64_internal(struct guest_info * core, addr_t i
     return 0;
 }
 
-static inline int invalidate_addr_64(struct guest_info * core, addr_t inv_addr)
+static inline int invalidate_addr_64(struct guest_info * core, addr_t inv_addr, 
+                                    addr_t *actual_start, addr_t *actual_end)
 {
-  addr_t start;
   uint64_t len;
+  int rc;
   
-  return invalidate_addr_64_internal(core,inv_addr,&start,&len);
+  rc = invalidate_addr_64_internal(core,inv_addr,actual_start,&len);
+
+  *actual_end = *actual_start + len - 1;
+
+  return rc;
 }
    
-static inline int invalidate_addr_64_range(struct guest_info * core, addr_t inv_addr_start, addr_t inv_addr_end)
+static inline int invalidate_addr_64_range(struct guest_info * core, addr_t inv_addr_start, addr_t inv_addr_end, 
+                                          addr_t *actual_start, addr_t *actual_end)
 {
   addr_t next;
   addr_t start;
@@ -274,11 +289,18 @@ static inline int invalidate_addr_64_range(struct guest_info * core, addr_t inv_
   
   for (next=inv_addr_start; next<=inv_addr_end; ) {
     rc = invalidate_addr_64_internal(core,next,&start, &len);
+    if (next==inv_addr_start) { 
+      // first iteration, capture where we start invalidating
+      *actual_start = start;
+    }
     if (rc) { 
       return rc;
     }
     next = start + len;
+    *actual_end = next;
   }
+  // last iteration, actual_end is off by one
+  (*actual_end)--;
   return 0;
 }
 
diff --git a/palacios/src/palacios/vmm_mem.c b/palacios/src/palacios/vmm_mem.c

index b205ea1..7bf0019 100644 (file)
--- a/palacios/src/palacios/vmm_mem.c
+++ b/palacios/src/palacios/vmm_mem.c
@@ -352,13 +352,13 @@ int v3_insert_mem_region(struct v3_vm_info * vm, struct v3_mem_region * region)
            v3_mem_mode_t mem_mode = v3_get_vm_mem_mode(info);
            
            if (mem_mode == PHYSICAL_MEM) {
-               rc |= v3_invalidate_passthrough_addr_range(info, region->guest_start, region->guest_end-1);
+             rc |= v3_invalidate_passthrough_addr_range(info, region->guest_start, region->guest_end-1,NULL,NULL);
            } else {
                rc |= v3_invalidate_shadow_pts(info);
            }
            
        } else if (info->shdw_pg_mode == NESTED_PAGING) {
-           rc |= v3_invalidate_nested_addr_range(info, region->guest_start, region->guest_end-1);
+         rc |= v3_invalidate_nested_addr_range(info, region->guest_start, region->guest_end-1,NULL,NULL);
        }
     }
 
@@ -559,13 +559,13 @@ void v3_delete_mem_region(struct v3_vm_info * vm, struct v3_mem_region * reg) {
            v3_mem_mode_t mem_mode = v3_get_vm_mem_mode(info);
            
            if (mem_mode == PHYSICAL_MEM) {
-             rc |= v3_invalidate_passthrough_addr_range(info,reg->guest_start, reg->guest_end-1);
+             rc |= v3_invalidate_passthrough_addr_range(info,reg->guest_start, reg->guest_end-1,NULL,NULL);
            } else {
              rc |= v3_invalidate_shadow_pts(info);
            }
            
        } else if (info->shdw_pg_mode == NESTED_PAGING) {
-         rc |= v3_invalidate_nested_addr_range(info,reg->guest_start, reg->guest_end-1);
+         rc |= v3_invalidate_nested_addr_range(info,reg->guest_start, reg->guest_end-1,NULL,NULL);
        }
     }
 
diff --git a/palacios/src/palacios/vmm_shadow_paging.c b/palacios/src/palacios/vmm_shadow_paging.c

index 38dcc64..10ed641 100644 (file)
--- a/palacios/src/palacios/vmm_shadow_paging.c
+++ b/palacios/src/palacios/vmm_shadow_paging.c
@@ -335,7 +335,7 @@ int v3_handle_shadow_pagefault(struct guest_info * core, addr_t fault_addr, pf_e
     
     if (v3_get_vm_mem_mode(core) == PHYSICAL_MEM) {
        // If paging is not turned on we need to handle the special cases
-       rc = v3_handle_passthrough_pagefault(core, fault_addr, error_code);
+      rc = v3_handle_passthrough_pagefault(core, fault_addr, error_code,NULL,NULL);
     } else if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
        struct v3_shdw_impl_state * state = &(core->vm_info->shdw_impl);
        struct v3_shdw_pg_impl * impl = state->current_impl;
diff --git a/palacios/src/palacios/vmx.c b/palacios/src/palacios/vmx.c

index aba80c7..7665fd3 100644 (file)
--- a/palacios/src/palacios/vmx.c
+++ b/palacios/src/palacios/vmx.c
@@ -329,7 +329,7 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
 
 
 
-       if (v3_init_ept(core, &hw_info) == -1) {
+       if (v3_init_nested_paging_core(core, &hw_info) == -1) {
            PrintError(core->vm_info, core, "Error initializing EPT\n");
            return -1;
        }
@@ -427,7 +427,7 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state)
        ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->ne = 1;
        ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->cd = 0;
 
-       if (v3_init_ept(core, &hw_info) == -1) {
+       if (v3_init_nested_paging_core(core, &hw_info) == -1) {
            PrintError(core->vm_info, core, "Error initializing EPT\n");
            return -1;
        }
diff --git a/palacios/src/palacios/vmx_ept.c b/palacios/src/palacios/vmx_ept.c

deleted file mode 100644 (file)

index 08e6765..0000000
--- a/palacios/src/palacios/vmx_ept.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/* 
- * This file is part of the Palacios Virtual Machine Monitor developed
- * by the V3VEE Project with funding from the United States National 
- * Science Foundation and the Department of Energy.  
- *
- * The V3VEE Project is a joint project between Northwestern University
- * and the University of New Mexico.  You can find out more at 
- * http://www.v3vee.org
- *
- * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu> 
- * All rights reserved.
- *
- * Author: Jack Lange <jacklange@cs.pitt.edu>
- *
- * This is free software.  You are permitted to use,
- * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
- */
-
-#include <palacios/vmm.h>
-#include <palacios/vmx_ept.h>
-#include <palacios/vmx_lowlevel.h>
-#include <palacios/vmm_paging.h>
-#include <palacios/vm_guest_mem.h>
-
-
-static struct vmx_ept_msr * ept_info = NULL;
-
-
-static addr_t create_ept_page() {
-    void * temp;
-    void * page = 0;
-    
-    temp = V3_AllocPages(1);  // need not be shadow-safe, not exposed to guest
-    if (!temp) {
-       PrintError(VM_NONE, VCORE_NONE, "Cannot allocate EPT page\n");
-       return 0;
-    }
-    page = V3_VAddr(temp);
-    memset(page, 0, PAGE_SIZE);
-
-    return (addr_t)page;
-}
-
-
-
-
-int v3_init_ept(struct guest_info * core, struct vmx_hw_info * hw_info) {
-    addr_t ept_pa = (addr_t)V3_PAddr((void *)create_ept_page());    
-    vmx_eptp_t * ept_ptr = (vmx_eptp_t *)&(core->direct_map_pt);
-
-
-    ept_info = &(hw_info->ept_info);
-
-    /* TODO: Should we set this to WB?? */
-    ept_ptr->psmt = 0;
-
-    if (ept_info->pg_walk_len4) {
-       ept_ptr->pwl1 = 3;
-    } else {
-       PrintError(core->vm_info, core, "Unsupported EPT Table depth\n");
-       return -1;
-    }
-
-    ept_ptr->pml_base_addr = PAGE_BASE_ADDR(ept_pa);
-
-
-    return 0;
-}
-
-
-/* We can use the default paging macros, since the formats are close enough to allow it */
-
-int v3_handle_ept_fault(struct guest_info * core, addr_t fault_addr, struct ept_exit_qual * ept_qual) {
-    ept_pml4_t    * pml     = NULL;
-    //    ept_pdp_1GB_t * pdpe1gb = NULL;
-    ept_pdp_t     * pdpe    = NULL;
-    ept_pde_2MB_t * pde2mb  = NULL;
-    ept_pde_t     * pde     = NULL;
-    ept_pte_t     * pte     = NULL;
-    addr_t host_addr     = 0;
-
-    int pml_index  = PML4E64_INDEX(fault_addr);
-    int pdpe_index = PDPE64_INDEX(fault_addr);
-    int pde_index  = PDE64_INDEX(fault_addr);
-    int pte_index  = PTE64_INDEX(fault_addr);
-
-    struct v3_mem_region * region = v3_get_mem_region(core->vm_info, core->vcpu_id, fault_addr);
-    int page_size = PAGE_SIZE_4KB;
-
-
-
-    pf_error_t error_code = {0};
-    error_code.present = ept_qual->present;
-    error_code.write = ept_qual->write;
-    
-    if (region == NULL) {
-       PrintError(core->vm_info, core, "invalid region, addr=%p\n", (void *)fault_addr);
-       return -1;
-    }
-
-    if ((core->use_large_pages == 1) || (core->use_giant_pages == 1)) {
-       page_size = v3_get_max_page_size(core, fault_addr, LONG);
-    }
-
-
-
-    pml = (ept_pml4_t *)CR3_TO_PML4E64_VA(core->direct_map_pt);
-
-
-
-    //Fix up the PML entry
-    if (pml[pml_index].read == 0) { 
-       pdpe = (ept_pdp_t *)create_ept_page();
-       
-       // Set default PML Flags...
-       pml[pml_index].read = 1;
-       pml[pml_index].write = 1;
-       pml[pml_index].exec = 1;
-
-       pml[pml_index].pdp_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pdpe));
-    } else {
-       pdpe = V3_VAddr((void *)BASE_TO_PAGE_ADDR_4KB(pml[pml_index].pdp_base_addr));
-    }
-
-
-    // Fix up the PDPE entry
-    if (pdpe[pdpe_index].read == 0) {
-       pde = (ept_pde_t *)create_ept_page();
-
-       // Set default PDPE Flags...
-       pdpe[pdpe_index].read = 1;
-       pdpe[pdpe_index].write = 1;
-       pdpe[pdpe_index].exec = 1;
-
-       pdpe[pdpe_index].pd_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pde));
-    } else {
-       pde = V3_VAddr((void *)BASE_TO_PAGE_ADDR_4KB(pdpe[pdpe_index].pd_base_addr));
-    }
-
-
-
-    // Fix up the 2MiB PDE and exit here
-    if (page_size == PAGE_SIZE_2MB) {
-       pde2mb = (ept_pde_2MB_t *)pde; // all but these two lines are the same for PTE
-       pde2mb[pde_index].large_page = 1;
-
-       if (pde2mb[pde_index].read == 0) {
-
-           if ( (region->flags.alloced == 1) && 
-                (region->flags.read == 1)) {
-               // Full access
-               pde2mb[pde_index].read = 1;
-               pde2mb[pde_index].exec = 1;
-               pde2mb[pde_index].ipat = 1;
-               pde2mb[pde_index].mt = 6;
-
-               if (region->flags.write == 1) {
-                   pde2mb[pde_index].write = 1;
-               } else {
-                   pde2mb[pde_index].write = 0;
-               }
-
-               if (v3_gpa_to_hpa(core, fault_addr, &host_addr) == -1) {
-                   PrintError(core->vm_info, core, "Error: Could not translate fault addr (%p)\n", (void *)fault_addr);
-                   return -1;
-               }
-
-               pde2mb[pde_index].page_base_addr = PAGE_BASE_ADDR_2MB(host_addr);
-           } else {
-               return region->unhandled(core, fault_addr, fault_addr, region, error_code);
-           }
-       } else {
-           // We fix all permissions on the first pass, 
-           // so we only get here if its an unhandled exception
-
-           return region->unhandled(core, fault_addr, fault_addr, region, error_code);
-       }
-
-       return 0;
-    }
-
-    // Continue with the 4KiB page heirarchy
-    
-
-    // Fix up the PDE entry
-    if (pde[pde_index].read == 0) {
-       pte = (ept_pte_t *)create_ept_page();
-       
-       pde[pde_index].read = 1;
-       pde[pde_index].write = 1;
-       pde[pde_index].exec = 1;
-
-       pde[pde_index].pt_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pte));
-    } else {
-       pte = V3_VAddr((void *)BASE_TO_PAGE_ADDR_4KB(pde[pde_index].pt_base_addr));
-    }
-
-
-
-
-    // Fix up the PTE entry
-    if (pte[pte_index].read == 0) {
-
-       if ( (region->flags.alloced == 1) && 
-            (region->flags.read == 1)) {
-           // Full access
-           pte[pte_index].read = 1;
-           pte[pte_index].exec = 1;
-           pte[pte_index].ipat = 1;
-           pte[pte_index].mt = 6;
-
-           if (region->flags.write == 1) {
-               pte[pte_index].write = 1;
-           } else {
-               pte[pte_index].write = 0;
-           }
-
-           if (v3_gpa_to_hpa(core, fault_addr, &host_addr) == -1) {
-               PrintError(core->vm_info, core, "Error Could not translate fault addr (%p)\n", (void *)fault_addr);
-               return -1;
-           }
-
-
-           pte[pte_index].page_base_addr = PAGE_BASE_ADDR_4KB(host_addr);
-       } else {
-           return region->unhandled(core, fault_addr, fault_addr, region, error_code);
-       }
-    } else {
-       // We fix all permissions on the first pass, 
-       // so we only get here if its an unhandled exception
-
-       return region->unhandled(core, fault_addr, fault_addr, region, error_code);
-    }
-
-
-    return 0;
-}
diff --git a/palacios/src/palacios/vmx_handler.c b/palacios/src/palacios/vmx_handler.c

index 8f5e12a..f8a183e 100644 (file)
--- a/palacios/src/palacios/vmx_handler.c
+++ b/palacios/src/palacios/vmx_handler.c
@@ -119,7 +119,7 @@ int v3_handle_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_inf
        case VMX_EXIT_EPT_VIOLATION: {
            struct ept_exit_qual * ept_qual = (struct ept_exit_qual *)&(exit_info->exit_qual);
 
-           if (v3_handle_ept_fault(info, exit_info->ept_fault_addr, ept_qual) == -1) {
+           if (v3_handle_nested_pagefault(info, exit_info->ept_fault_addr, ept_qual,NULL,NULL) == -1) {
                PrintError(info->vm_info, info, "Error handling EPT fault\n");
                return -1;
            }
diff --git a/palacios/src/palacios/vmx_npt.h b/palacios/src/palacios/vmx_npt.h

new file mode 100644 (file)

index 0000000..aa1bc14
--- /dev/null
+++ b/palacios/src/palacios/vmx_npt.h
@@ -0,0 +1,409 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jacklange@cs.pitt.edu>    (implementation)
+ *         Peter Dinda <pdinda@northwestern.edu> (invalidation)
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmx_ept.h>
+#include <palacios/vmx_lowlevel.h>
+#include <palacios/vmm_paging.h>
+#include <palacios/vm_guest_mem.h>
+
+
+/*
+
+  Note that the Intel nested page table have a slightly different format
+  than regular page tables.   Also note that our implementation
+  uses only 64 bit (4 level) page tables.  This is unlike the SVM 
+  nested paging implementation.
+
+
+*/
+
+#ifndef V3_CONFIG_VMX
+
+
+static int handle_vmx_nested_pagefault(struct guest_info * info, addr_t fault_addr, void *info) 
+{
+    PrintError(info->vm_info, info, "Cannot do nested page fault as VMX is not enabled.\n");
+    return -1;
+}
+static int handle_vmx_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr) 
+{
+    PrintError(info->vm_info, info, "Cannot do invalidate nested addr as VMX is not enabled.\n");
+    return -1;
+}
+static int handle_vmx_invalidate_nested_addr_range(struct guest_info * info, 
+                                                  addr_t inv_addr_start, addr_t inv_addr_end) 
+{
+    PrintError(info->vm_info, info, "Cannot do invalidate nested addr range as VMX is not enabled.\n");
+    return -1;
+}
+
+#else
+
+static struct vmx_ept_msr * ept_info = NULL;
+
+
+static addr_t create_ept_page() {
+    void * temp;
+    void * page = 0;
+    
+    temp = V3_AllocPages(1);  // need not be shadow-safe, not exposed to guest
+    if (!temp) {
+       PrintError(VM_NONE, VCORE_NONE, "Cannot allocate EPT page\n");
+       return 0;
+    }
+    page = V3_VAddr(temp);
+    memset(page, 0, PAGE_SIZE);
+
+    return (addr_t)page;
+}
+
+
+
+
+static int init_ept(struct guest_info * core, struct vmx_hw_info * hw_info) {
+    addr_t ept_pa = (addr_t)V3_PAddr((void *)create_ept_page());    
+    vmx_eptp_t * ept_ptr = (vmx_eptp_t *)&(core->direct_map_pt);
+
+
+    ept_info = &(hw_info->ept_info);
+
+    /* TODO: Should we set this to WB?? */
+    ept_ptr->psmt = 0;
+
+    if (ept_info->pg_walk_len4) {
+       ept_ptr->pwl1 = 3;
+    } else {
+       PrintError(core->vm_info, core, "Unsupported EPT Table depth\n");
+       return -1;
+    }
+
+    ept_ptr->pml_base_addr = PAGE_BASE_ADDR(ept_pa);
+
+
+    return 0;
+}
+
+
+static inline void ept_exit_qual_to_pf_error(struct ept_exit_qual *qual, pf_error_t *error)
+{
+    memset(error,0,sizeof(pf_error_t));
+    error->present = qual->present;
+    error->write = qual->write;
+    error->ifetch = qual->ifetch;
+}
+    
+
+/* We can use the default paging macros, since the formats are close enough to allow it */
+
+
+static int handle_vmx_nested_pagefault(struct guest_info * core, addr_t fault_addr, void *pfinfo,
+                                      addr_t *actual_start, addr_t *actual_end )
+{
+    struct ept_exit_qual * ept_qual = (struct ept_exit_qual *) pfinfo;
+    ept_pml4_t    * pml     = NULL;
+    //    ept_pdp_1GB_t * pdpe1gb = NULL;
+    ept_pdp_t     * pdpe    = NULL;
+    ept_pde_2MB_t * pde2mb  = NULL;
+    ept_pde_t     * pde     = NULL;
+    ept_pte_t     * pte     = NULL;
+    addr_t host_addr     = 0;
+
+    int pml_index  = PML4E64_INDEX(fault_addr);
+    int pdpe_index = PDPE64_INDEX(fault_addr);
+    int pde_index  = PDE64_INDEX(fault_addr);
+    int pte_index  = PTE64_INDEX(fault_addr);
+
+    struct v3_mem_region * region = v3_get_mem_region(core->vm_info, core->vcpu_id, fault_addr);
+    int page_size = PAGE_SIZE_4KB;
+
+
+    pf_error_t error_code;
+    
+    ept_exit_qual_to_pf_error(ept_qual, &error_code);
+
+    PrintDebug(info->vm_info, info, "Nested PageFault: fault_addr=%p, error_code=%u, exit_qual=0x%llx\n", (void *)fault_addr, *(uint_t *)&error_code, qual->value);
+
+    
+    if (region == NULL) {
+       PrintError(core->vm_info, core, "invalid region, addr=%p\n", (void *)fault_addr);
+       return -1;
+    }
+
+    if ((core->use_large_pages == 1) || (core->use_giant_pages == 1)) {
+       page_size = v3_get_max_page_size(core, fault_addr, LONG);
+    }
+
+
+
+    pml = (ept_pml4_t *)CR3_TO_PML4E64_VA(core->direct_map_pt);
+
+
+
+    //Fix up the PML entry
+    if (pml[pml_index].read == 0) { 
+       pdpe = (ept_pdp_t *)create_ept_page();
+       
+       // Set default PML Flags...
+       pml[pml_index].read = 1;
+       pml[pml_index].write = 1;
+       pml[pml_index].exec = 1;
+
+       pml[pml_index].pdp_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pdpe));
+    } else {
+       pdpe = V3_VAddr((void *)BASE_TO_PAGE_ADDR_4KB(pml[pml_index].pdp_base_addr));
+    }
+
+
+    // Fix up the PDPE entry
+    if (pdpe[pdpe_index].read == 0) {
+       pde = (ept_pde_t *)create_ept_page();
+
+       // Set default PDPE Flags...
+       pdpe[pdpe_index].read = 1;
+       pdpe[pdpe_index].write = 1;
+       pdpe[pdpe_index].exec = 1;
+
+       pdpe[pdpe_index].pd_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pde));
+    } else {
+       pde = V3_VAddr((void *)BASE_TO_PAGE_ADDR_4KB(pdpe[pdpe_index].pd_base_addr));
+    }
+
+
+
+    // Fix up the 2MiB PDE and exit here
+    if (page_size == PAGE_SIZE_2MB) {
+       pde2mb = (ept_pde_2MB_t *)pde; // all but these two lines are the same for PTE
+       pde2mb[pde_index].large_page = 1;
+
+       *actual_start = BASE_TO_PAGE_ADDR_2MB(PAGE_BASE_ADDR_2MB(fault_addr));
+       *actual_end = BASE_TO_PAGE_ADDR_2MB(PAGE_BASE_ADDR_2MB(fault_addr)+1)-1;
+
+       if (pde2mb[pde_index].read == 0) {
+
+           if ( (region->flags.alloced == 1) && 
+                (region->flags.read == 1)) {
+               // Full access
+               pde2mb[pde_index].read = 1;
+               pde2mb[pde_index].exec = 1;
+               pde2mb[pde_index].ipat = 1;
+               pde2mb[pde_index].mt = 6;
+
+               if (region->flags.write == 1) {
+                   pde2mb[pde_index].write = 1;
+               } else {
+                   pde2mb[pde_index].write = 0;
+               }
+
+               if (v3_gpa_to_hpa(core, fault_addr, &host_addr) == -1) {
+                   PrintError(core->vm_info, core, "Error: Could not translate fault addr (%p)\n", (void *)fault_addr);
+                   return -1;
+               }
+
+               pde2mb[pde_index].page_base_addr = PAGE_BASE_ADDR_2MB(host_addr);
+           } else {
+               return region->unhandled(core, fault_addr, fault_addr, region, error_code);
+           }
+       } else {
+           // We fix all permissions on the first pass, 
+           // so we only get here if its an unhandled exception
+
+           return region->unhandled(core, fault_addr, fault_addr, region, error_code);
+       }
+
+       return 0;
+    }
+
+    // Continue with the 4KiB page heirarchy
+    
+
+    // Fix up the PDE entry
+    if (pde[pde_index].read == 0) {
+       pte = (ept_pte_t *)create_ept_page();
+       
+       pde[pde_index].read = 1;
+       pde[pde_index].write = 1;
+       pde[pde_index].exec = 1;
+
+       pde[pde_index].pt_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pte));
+    } else {
+       pte = V3_VAddr((void *)BASE_TO_PAGE_ADDR_4KB(pde[pde_index].pt_base_addr));
+    }
+
+
+    *actual_start = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr));
+    *actual_end = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(fault_addr)+1)-1;
+
+
+    // Fix up the PTE entry
+    if (pte[pte_index].read == 0) {
+
+       if ( (region->flags.alloced == 1) && 
+            (region->flags.read == 1)) {
+           // Full access
+           pte[pte_index].read = 1;
+           pte[pte_index].exec = 1;
+           pte[pte_index].ipat = 1;
+           pte[pte_index].mt = 6;
+
+           if (region->flags.write == 1) {
+               pte[pte_index].write = 1;
+           } else {
+               pte[pte_index].write = 0;
+           }
+
+           if (v3_gpa_to_hpa(core, fault_addr, &host_addr) == -1) {
+               PrintError(core->vm_info, core, "Error Could not translate fault addr (%p)\n", (void *)fault_addr);
+               return -1;
+           }
+
+
+           pte[pte_index].page_base_addr = PAGE_BASE_ADDR_4KB(host_addr);
+       } else {
+           return region->unhandled(core, fault_addr, fault_addr, region, error_code);
+       }
+    } else {
+       // We fix all permissions on the first pass, 
+       // so we only get here if its an unhandled exception
+
+       return region->unhandled(core, fault_addr, fault_addr, region, error_code);
+    }
+
+
+    return 0;
+}
+
+
+static int handle_vmx_invalidate_nested_addr_internal(struct guest_info *core, addr_t inv_addr,
+                                                     addr_t *actual_start, uint64_t *actual_size) {
+  ept_pml4_t    *pml = NULL;
+  ept_pdp_t     *pdpe = NULL;
+  ept_pde_t     *pde = NULL;
+  ept_pte_t     *pte = NULL;
+
+
+ 
+  // clear the page table entry
+  
+  int pml_index = PML4E64_INDEX(inv_addr);
+  int pdpe_index = PDPE64_INDEX(inv_addr);
+  int pde_index = PDE64_INDEX(inv_addr);
+  int pte_index = PTE64_INDEX(inv_addr);
+
+ 
+  pml = (ept_pml4_t *)CR3_TO_PML4E64_VA(core->direct_map_pt);
+  
+
+  // note that there are no present bits in EPT, so we 
+  // use the read bit to signify this.
+  // either an entry is read/write/exec or it is none of these
+ 
+  if (pml[pml_index].read == 0) {
+    // already invalidated
+    *actual_start = BASE_TO_PAGE_ADDR_512GB(PAGE_BASE_ADDR_512GB(inv_addr));
+    *actual_size = PAGE_SIZE_512GB;
+    return 0;
+  }
+
+  pdpe = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pml[pml_index].pdp_base_addr));
+  
+  if (pdpe[pdpe_index].read == 0) {
+    // already invalidated
+    *actual_start = BASE_TO_PAGE_ADDR_1GB(PAGE_BASE_ADDR_1GB(inv_addr));
+    *actual_size = PAGE_SIZE_1GB;
+    return 0;
+  } else if (pdpe[pdpe_index].large_page == 1) { // 1GiB
+    pdpe[pdpe_index].read = 0;
+    pdpe[pdpe_index].write = 0;
+    pdpe[pdpe_index].exec = 0;
+    *actual_start = BASE_TO_PAGE_ADDR_1GB(PAGE_BASE_ADDR_1GB(inv_addr));
+    *actual_size = PAGE_SIZE_1GB;
+    return 0;
+  }
+
+  pde = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pdpe[pdpe_index].pd_base_addr));
+
+  if (pde[pde_index].read == 0) {
+    // already invalidated
+    *actual_start = BASE_TO_PAGE_ADDR_2MB(PAGE_BASE_ADDR_2MB(inv_addr));
+    *actual_size = PAGE_SIZE_2MB;
+    return 0;
+  } else if (pde[pde_index].large_page == 1) { // 2MiB
+    pde[pde_index].read = 0;
+    pde[pde_index].write = 0;
+    pde[pde_index].exec = 0;
+    *actual_start = BASE_TO_PAGE_ADDR_2MB(PAGE_BASE_ADDR_2MB(inv_addr));
+    *actual_size = PAGE_SIZE_2MB;
+    return 0;
+  }
+
+  pte = V3_VAddr((void*)BASE_TO_PAGE_ADDR(pde[pde_index].pt_base_addr));
+  
+  pte[pte_index].read = 0; // 4KiB
+  pte[pte_index].write = 0;
+  pte[pte_index].exec = 0;
+  
+  *actual_start = BASE_TO_PAGE_ADDR_4KB(PAGE_BASE_ADDR_4KB(inv_addr));
+  *actual_size = PAGE_SIZE_4KB;
+  
+  return 0;
+}
+
+
+static int handle_vmx_invalidate_nested_addr(struct guest_info *core, addr_t inv_addr, 
+                                            addr_t *actual_start, addr_t *actual_end) 
+{
+  uint64_t len;
+  int rc;
+  
+  rc = handle_vmx_invalidate_nested_addr_internal(core,inv_addr,actual_start,&len);
+  
+  *actual_end = *actual_start + len - 1;
+
+  return rc;
+}
+
+
+static int handle_vmx_invalidate_nested_addr_range(struct guest_info *core, 
+                                                  addr_t inv_addr_start, addr_t inv_addr_end,
+                                                  addr_t *actual_start, addr_t *actual_end) 
+{
+  addr_t next;
+  addr_t start;
+  uint64_t len;
+  int rc;
+  
+  for (next=inv_addr_start; next<=inv_addr_end; ) {
+    rc = handle_vmx_invalidate_nested_addr_internal(core,next,&start, &len);
+    if (next==inv_addr_start) { 
+      // first iteration, capture where we start invalidating
+      *actual_start = start;
+    }
+    if (rc) { 
+      return rc;
+    }
+    next = start + len;
+    *actual_end = next;
+  }
+  // last iteration, actual_end is off by one
+  (*actual_end)--;
+  return 0;
+}
+
+#endif
palacios/include/palacios/vm_guest.h		patch \| blob \| history
palacios/include/palacios/vmm_direct_paging.h		patch \| blob \| history
palacios/include/palacios/vmm_telemetry.h		patch \| blob \| history
palacios/include/palacios/vmx_ept.h		patch \| blob \| history
palacios/src/palacios/Makefile		patch \| blob \| history
palacios/src/palacios/svm_handler.c		patch \| blob \| history
palacios/src/palacios/svm_npt.h	[new file with mode: 0644]	patch \| blob
palacios/src/palacios/vm_guest.c		patch \| blob \| history
palacios/src/palacios/vmm.c		patch \| blob \| history
palacios/src/palacios/vmm_direct_paging.c		patch \| blob \| history
palacios/src/palacios/vmm_direct_paging_32.h		patch \| blob \| history
palacios/src/palacios/vmm_direct_paging_32pae.h		patch \| blob \| history
palacios/src/palacios/vmm_direct_paging_64.h		patch \| blob \| history
palacios/src/palacios/vmm_mem.c		patch \| blob \| history
palacios/src/palacios/vmm_shadow_paging.c		patch \| blob \| history
palacios/src/palacios/vmx.c		patch \| blob \| history
palacios/src/palacios/vmx_ept.c	[deleted file]	patch \| blob \| history
palacios/src/palacios/vmx_handler.c		patch \| blob \| history
palacios/src/palacios/vmx_npt.h	[new file with mode: 0644]	patch \| blob