From: Peter Dinda <pdinda@northwestern.edu>
Date: Fri, 25 Oct 2013 23:30:39 +0000 (-0500)
Subject: Floating point context-switching and checkpoint/load
X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=9feccf93cd8327d1d30a404a92f19716bf5a1e96

Floating point context-switching and checkpoint/load

This integrates:

- the option to do floating point context-switching
- conservative code to do context-switching in Palacios
- a lazy floating point save/restore host interface
- an implementation of this interface in the linux module
- liberal code to use this interface in Palacios
- floating point checkpointing

This also includes a performance tuning element that
is hard to separate
---

diff --git a/Kconfig b/Kconfig
index 9c9dfe2..028645f 100644
--- a/Kconfig
+++ b/Kconfig
@@ -75,6 +75,7 @@ config VMX
 	  Compile with support for Intel VMX
 
 
+
 config FRAME_POINTER
 	bool "Compile with Frame pointers"
 	default n
@@ -144,6 +145,49 @@ config MAX_CPUS
 endmenu
 
 source "palacios/src/interfaces/Kconfig"
+
+menu "Virtual core specialization"
+
+config CUSTOM_CPUID
+	bool "Use custom CPU information (vendor, etc)"
+	default y
+	help 
+          If set, the CPU information will be for a special V3VEE vendor.
+	  This should result in identical guest kernel setup, regardless
+          of the underlying hardware, but it also means that the guest kernel
+          has no chance of employing CPU-specific bug fixes.
+
+config STRICT_MSR_SEMANTICS
+	bool "Use strict RDMSR/WRMSR semantics"
+	default y
+	help
+	  Use strict MSR semantics - when an unhandled MSR is read or written,
+	  a GPF is generated.  This is typically usd with CUSTOM_CPU_TYPE on.
+
+config FP_SWITCH
+	bool "Floating point context switching"
+        default y
+        help
+          If set, floating point is handled for context switches 
+          (VM1->VM2->VM1 and/or VM->HOST->VM).   This can be disabled
+          for environments where a single VM is the only user of FP.
+          Note that even if disabled, FP save/restore code is included
+          for support of checkpoint/restore.
+
+config LAZY_FP_SWITCH
+	bool "Use host-based lazy floating point context switching"
+        depends on FP_SWITCH && HOST_LAZY_FPU_SWITCH
+	default y
+	help
+	  When true,  the host's lazy floating point save/restore 
+          mechanism is notified on each exit and entry.  If false,
+          the floating point state is explicitly saved on each exit
+          and restored on each entry---this save/restore is entirely
+          done in Palacios.
+          
+          
+endmenu
+
 source "palacios/src/extensions/Kconfig"
 
 config TELEMETRY
diff --git a/linux_module/palacios-stubs.c b/linux_module/palacios-stubs.c
index 08021e4..decae1f 100644
--- a/linux_module/palacios-stubs.c
+++ b/linux_module/palacios-stubs.c
@@ -16,8 +16,15 @@
 #include <linux/smp.h>
 #include <linux/vmalloc.h>
 
+#include <asm/i387.h>
+
 #include <palacios/vmm.h>
 #include <palacios/vmm_host_events.h>
+
+#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH
+#include <interfaces/vmm_lazy_fpu.h>
+#endif
+
 #include "palacios.h"
 
 #include "mm.h"
@@ -25,6 +32,8 @@
 #include "memcheck.h"
 #include "lockcheck.h"
 
+
+
 // The following can be used to track heap bugs
 // zero memory after allocation
 #define ALLOC_ZERO_MEM 0
@@ -169,14 +178,14 @@ void *palacios_allocate_pages(int num_pages, unsigned int alignment, int node_id
     void * pg_addr = NULL;
 
     if (num_pages<=0) { 
-      ERROR("ALERT ALERT Attempt to allocate zero or fewer pages\n");
+	ERROR("ALERT ALERT Attempt to allocate zero or fewer pages (%d pages, alignment %d, node %d, constraints 0x%x)\n",num_pages, alignment, node_id, constraints);
       return NULL;
     }
 
     pg_addr = (void *)alloc_palacios_pgs(num_pages, alignment, node_id, constraints);
 
     if (!pg_addr) { 
-	ERROR("ALERT ALERT  Page allocation has FAILED Warning\n");
+	ERROR("ALERT ALERT  Page allocation has FAILED Warning (%d pages, alignment %d, node %d, constraints 0x%x)\n",num_pages, alignment, node_id, constraints);
 	return NULL;
     }
 
@@ -195,6 +204,10 @@ void *palacios_allocate_pages(int num_pages, unsigned int alignment, int node_id
  */
 
 void palacios_free_pages(void * page_paddr, int num_pages) {
+    if (!page_paddr) { 
+	ERROR("Ignoring free pages: 0x%p (0x%lx)for %d pages\n", page_paddr, (uintptr_t)page_paddr, num_pages);
+	dump_stack();
+    }
     pg_frees += num_pages;
     free_palacios_pgs((uintptr_t)page_paddr, num_pages);
     MEMCHECK_FREE_PAGES(page_paddr,num_pages*4096);
@@ -294,6 +307,10 @@ palacios_free(
 	void *			addr
 )
 {
+    if (!addr) {
+	ERROR("Ignoring free : 0x%p\n", addr);
+	dump_stack();
+    }
     frees++;
     kfree(addr-ALLOC_PAD);
     MEMCHECK_KFREE(addr-ALLOC_PAD);
@@ -359,17 +376,25 @@ static int lnx_thread_target(void * arg) {
       allow_signal(SIGKILL);
     */
 
+#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH
+    // We are a kernel thread that needs FPU save/restore state
+    // vcores definitely need this, all the other threads get it too, 
+    // but they just won't use it
+    fpu_alloc(&(current->thread.fpu));
+#endif
 
     ret = thread_info->fn(thread_info->arg);
 
-
     INFO("Palacios Thread (%s) EXITING\n", thread_info->name);
 
     palacios_free(thread_info);
     // handle cleanup 
 
+    // We rely on do_exit to free the fpu data
+    // since we could get switched at any point until the thread is done... 
+
     do_exit(ret);
-    
+
     return 0; // should not get here.
 }
 
@@ -764,6 +789,33 @@ palacios_mutex_unlock_irqrestore(void *mutex, void *flags)
     LOCKCHECK_UNLOCK_IRQRESTORE_POST(mutex,(unsigned long)flags);
 }
 
+void palacios_used_fpu(void)
+{
+   struct thread_info *cur = current_thread_info();
+
+   // We assume we are not preemptible here...
+   cur->status |= TS_USEDFPU;
+   clts(); 
+   // After this, FP Save should be handled by Linux if it
+   // switches to a different task and that task uses FPU
+}
+
+inline int ists(void)
+{
+   return read_cr0() & X86_CR0_TS;
+
+}
+void palacios_need_fpu(void)
+{
+    // We assume we are not preemptible here... 
+    if (ists()) { 
+      // we have been switched back to from somewhere else...
+      // Do a restore now - this will also do a clts()
+      math_state_restore();
+    }
+}
+
+
 /**
  * Structure used by the Palacios hypervisor to interface with the host kernel.
  */
@@ -796,6 +848,15 @@ static struct v3_os_hooks palacios_os_hooks = {
 };
 
 
+#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH
+// Note that this host interface is defined here since it's
+// intertwined with thread creation... 
+static struct v3_lazy_fpu_iface palacios_fpu_hooks = {
+        .used_fpu               = palacios_used_fpu,
+        .need_fpu               = palacios_need_fpu
+};
+
+#endif
 
 
 int palacios_vmm_init( char *options )
@@ -842,6 +903,10 @@ int palacios_vmm_init( char *options )
 
     Init_V3(&palacios_os_hooks, cpu_mask, num_cpus, options);
 
+#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH
+    V3_Init_Lazy_FPU(&palacios_fpu_hooks);
+#endif
+
     return 0;
 
 }
diff --git a/linux_module/palacios.h b/linux_module/palacios.h
index c9cbb96..b4c17e6 100644
--- a/linux_module/palacios.h
+++ b/linux_module/palacios.h
@@ -166,6 +166,8 @@ void  palacios_yield_cpu(void);
 void  palacios_sleep_cpu(unsigned int us);
 unsigned int palacios_get_cpu(void);
 unsigned int palacios_get_cpu_khz(void);
+void  palacios_used_fpu(void);
+void  palacios_need_fpu(void);
 void *palacios_mutex_alloc(void);         // allocates and inits a lock
 void  palacios_mutex_init(void *mutex);   // only inits a lock
 void  palacios_mutex_deinit(void *mutex); // only deinits a lock
diff --git a/palacios/include/interfaces/vmm_lazy_fpu.h b/palacios/include/interfaces/vmm_lazy_fpu.h
new file mode 100644
index 0000000..a50dc14
--- /dev/null
+++ b/palacios/include/interfaces/vmm_lazy_fpu.h
@@ -0,0 +1,62 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2013, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Peter Dinda <pdinda@northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __VMM_LAZY_FPU
+#define __VMM_LAZY_FPU
+
+#include <palacios/vmm_types.h>
+
+
+struct v3_lazy_fpu_iface {
+
+    // if these two are provided then lazy FP save/restore handled by host
+    // indicate that the calling thread has used floating point
+    void (*used_fpu)(void);
+    // indicate that the calling thread wants to use floating point again
+    void (*need_fpu)(void);
+
+};
+
+
+/*
+ *  function prototypes
+ */
+
+extern void V3_Init_Lazy_FPU(struct v3_lazy_fpu_iface * palacios_lazy_fpu);
+
+#ifdef __V3VEE__
+
+#define V3_LAZY_FPU_USED()                                                  \
+  do {							                    \
+    extern struct v3_lazy_fpu_iface * palacios_lazy_fpu_hooks;              \
+    if ((palacios_lazy_fpu_hooks) && (palacios_lazy_fpu_hooks)->used_fpu)         { \
+      (palacios_lazy_fpu_hooks)->used_fpu();                                \
+    }                                                                       \
+  } while (0)
+
+#define V3_LAZY_FPU_NEED()						    \
+  do {							                    \
+    extern struct v3_lazy_fpu_iface * palacios_lazy_fpu_hooks;		    \
+    if ((palacios_lazy_fpu_hooks) && (palacios_lazy_fpu_hooks)->need_fpu)         { \
+	(palacios_lazy_fpu_hooks)->need_fpu();                   	    \
+    }						                            \
+  } while (0)
+
+#endif
+
+#endif
diff --git a/palacios/include/palacios/vm_guest.h b/palacios/include/palacios/vm_guest.h
index 5272b0d..4a9d075 100644
--- a/palacios/include/palacios/vm_guest.h
+++ b/palacios/include/palacios/vm_guest.h
@@ -27,6 +27,7 @@
 #include <palacios/vmm_mem_hook.h>
 #include <palacios/vmm_io.h>
 #include <palacios/vmm_shadow_paging.h>
+#include <palacios/vmm_direct_paging.h>
 #include <palacios/vmm_intr.h>
 #include <palacios/vmm_excp.h>
 #include <palacios/vmm_dev_mgr.h>
@@ -43,7 +44,7 @@
 #include <palacios/vmm_events.h>
 #include <palacios/vmm_scheduler.h>
 #include <palacios/vmm_fw_cfg.h>
-
+#include <palacios/vmm_fp.h>
 #include <palacios/vmm_perftune.h>
 
 #ifdef V3_CONFIG_TELEMETRY
@@ -86,6 +87,7 @@ struct guest_info {
 
     v3_paging_mode_t shdw_pg_mode;
     struct v3_shdw_pg_state shdw_pg_state;
+    //struct v3_nested_pg_state nested_pg_state;
     addr_t direct_map_pt;
     
 
@@ -116,6 +118,7 @@ struct guest_info {
     struct v3_segments segments;
     struct v3_msrs     msrs;
 
+    struct v3_fp_state fp_state;
 
     void * vmm_data;
 
@@ -177,6 +180,7 @@ struct v3_vm_info {
     struct v3_mem_hooks mem_hooks;
 
     struct v3_shdw_impl_state shdw_impl;
+    //struct v3_nested_impl_state nested_impl;
     void * sched_priv_data;
 
     struct v3_io_map io_map;
diff --git a/palacios/include/palacios/vmm.h b/palacios/include/palacios/vmm.h
index 983cd78..2b02058 100644
--- a/palacios/include/palacios/vmm.h
+++ b/palacios/include/palacios/vmm.h
@@ -369,8 +369,6 @@ struct v3_os_hooks {
 
     unsigned int (*get_cpu)(void);
 
-
-
     void * (*start_kernel_thread)(int (*fn)(void * arg), void * arg, char * thread_name); 
     void (*interrupt_cpu)(struct v3_vm_info * vm, int logical_cpu, int vector);
     void (*call_on_cpu)(int logical_cpu, void (*fn)(void * arg), void * arg);
diff --git a/palacios/include/palacios/vmm_fp.h b/palacios/include/palacios/vmm_fp.h
new file mode 100644
index 0000000..76b377d
--- /dev/null
+++ b/palacios/include/palacios/vmm_fp.h
@@ -0,0 +1,224 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2013, Peter Dinda <pdinda@northwestern.edu> 
+ * Copyright (c) 2013, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Peter Dinda <pdinda@northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __VMM_FP_H
+#define __VMM_FP_H
+
+#include <palacios/vmm_types.h>
+#include <palacios/vmm.h>
+#ifdef V3_CONFIG_LAZY_FPU_SWITCH
+#include <interfaces/vmm_lazy_fpu.h>
+#endif
+
+// the FPRs are arranged into the 
+// precise layout of the FXSAVE/FXRESTORE instructions 
+// bytes 32+, which is common for all three variants
+// 8*6 reserved + 8*10 (fpu/mmx) + 16*16 (xmm) 
+// + 3*16 (res) + 3*16 (ava) = 480 bytes
+// another 32 bytes are used for the store header
+// which varies depending on machine mode
+struct v3_fp_regs {
+  v3_fp_mmx_reg_t   stmm0;  // stmm0..7 are the x87 stack or mmx regs
+  uint8_t           res0[6]; 
+  v3_fp_mmx_reg_t   stmm1;  
+  uint8_t           res1[6]; 
+  v3_fp_mmx_reg_t   stmm2;  
+  uint8_t           res2[6]; 
+  v3_fp_mmx_reg_t   stmm3;  
+  uint8_t           res3[6]; 
+  v3_fp_mmx_reg_t   stmm4;  
+  uint8_t           res4[6]; 
+  v3_fp_mmx_reg_t   stmm5;
+  uint8_t           res5[6]; 
+  v3_fp_mmx_reg_t   stmm6;  
+  uint8_t           res6[6]; 
+  v3_fp_mmx_reg_t   stmm7;  
+  uint8_t           res7[6]; 
+  v3_xmm_reg_t      xmm0;   // xmm0..7 are the "classic" SSE regs
+  v3_xmm_reg_t      xmm1;
+  v3_xmm_reg_t      xmm2;
+  v3_xmm_reg_t      xmm3;
+  v3_xmm_reg_t      xmm4;
+  v3_xmm_reg_t      xmm5;
+  v3_xmm_reg_t      xmm6;
+  v3_xmm_reg_t      xmm7;
+  v3_xmm_reg_t      xmm8;    //xmm8..15 are the "new" SSE reg
+  v3_xmm_reg_t      xmm9;
+  v3_xmm_reg_t      xmm10;
+  v3_xmm_reg_t      xmm11;
+  v3_xmm_reg_t      xmm12;
+  v3_xmm_reg_t      xmm13;
+  v3_xmm_reg_t      xmm14;
+  v3_xmm_reg_t      xmm15;
+  v3_xmm_reg_t      res16;  // reserved
+  v3_xmm_reg_t      res17;
+  v3_xmm_reg_t      res18;
+  v3_xmm_reg_t      ava19;
+  v3_xmm_reg_t      ava20;
+  v3_xmm_reg_t      ava21;
+} __attribute__((packed)) __attribute__((aligned(16)));
+
+// FXSAVE, 32 bit mode header (32 bytes)
+// V3_FP_MODE_32
+struct v3_fp_32_state {
+  uint16_t          fcw;
+  uint16_t          fsw;
+  uint8_t           ftw;
+  uint8_t           res0;
+  uint16_t          fop;
+  uint32_t          fip; //fpu instruction pointer
+  uint16_t          fcs; //fpu code segment selector
+  uint16_t          res1;
+  uint32_t          fdp; //fpu data pointer
+  uint16_t          fds; //fpu data segment selector
+  uint16_t          res2;
+  uint32_t          mxcsr;
+  uint32_t          mxcsr_mask;
+} __attribute__((packed)) __attribute__((aligned(16)));
+
+// FXSAVE, 64 bit mode header, REX.W=1 (32 bytes)
+// V3_FP_MODE_64
+struct v3_fp_64_state {
+  uint16_t          fcw;
+  uint16_t          fsw;
+  uint8_t           ftw;
+  uint8_t           res0;
+  uint16_t          fop;
+  uint64_t          fip; //fpu instruction pointer
+  uint64_t          fdp; //fpu data pointer
+  uint32_t          mxcsr;
+  uint32_t          mxcsr_mask;
+} __attribute__((packed)) __attribute__((aligned(16)));
+
+
+// FXSAVE, 64 bit mode header, REX.W=0 (32 bytes)
+// V3_FP_MODE_64_COMPAT
+struct v3_fp_64compat_state {
+  uint16_t          fcw;
+  uint16_t          fsw;
+  uint8_t           ftw;
+  uint8_t           res0;
+  uint16_t          fop;
+  uint32_t          fip; //fpu instruction pointer
+  uint16_t          fcs; //fpu code segment selector
+  uint16_t          res1;
+  uint32_t          fdp; //fpu data pointer
+  uint16_t          fds; //fpu data segment selector
+  uint16_t          res2;
+  uint32_t          mxcsr;
+  uint32_t          mxcsr_mask;
+} __attribute__((packed)) __attribute__((aligned(16)));
+
+
+//
+// This is an FXSAVE block
+//    
+struct v3_fp_state_core {
+  union {
+    struct v3_fp_32_state fp32;
+    struct v3_fp_64_state fp64;
+    struct v3_fp_64compat_state fp64compat;
+  } header;
+  struct v3_fp_regs fprs;
+} __attribute__((packed)) __attribute__((aligned(16)));
+  
+struct v3_fp_state {
+  // Do we need to restore on next entry?
+  int need_restore;
+  // The meaning 
+  enum {V3_FP_MODE_32=0, V3_FP_MODE_64, V3_FP_MODE_64_COMPAT} state_type;
+  struct v3_fp_state_core  state __attribute__((aligned(16)));
+} ;
+
+
+struct guest_info;
+
+// Can we save FP state on this core?
+int v3_can_handle_fp_state(); 
+
+// Save state from this core to the structure
+int v3_get_fp_state(struct guest_info *core);
+
+// Restore FP state from this structure to this core
+int v3_put_fp_state(struct guest_info *core);
+
+int v3_init_fp(void);
+int v3_deinit_fp(void);
+
+#ifndef V3_CONFIG_FP_SWITCH
+
+#define V3_FP_EXIT_SAVE(core) 
+#define V3_FP_ENTRY_RESTORE(core)
+
+#else
+
+#ifdef V3_CONFIG_LAZY_FPU_SWITCH
+
+
+/* Ideally these would use the TS trick to do lazy calls to used_fpu() */
+#define V3_FP_EXIT_SAVE(core)                                               \
+  do {							                    \
+    extern struct v3_lazy_fpu_hooks * lazy_fpu_hooks;			    \
+    if ((lazy_fpu_hooks) && (lazy_fpu_hooks)->used_fpu)                   { \
+      (lazy_fpu_hooks)->used_fpu();                                         \
+    } else {                                                                \
+      v3_get_fp_state(core);                                                \
+    }                                                                       \
+  } while (0)
+
+#define V3_FP_ENTRY_RESTORE(core)	 				    \
+  do {							                    \
+    extern struct v3_lazy_fpu_hooks * lazy_fpu_hooks;		            \
+    if ((core)->fp_state.need_restore) {				    \
+      v3_put_fp_state(core);                                                \
+      (core)->fp_state.need_restore=0;		   			    \
+    } else {                                                                \
+	if ((lazy_fpu_hooks) && (lazy_fpu_hooks)->will_use_fpu)           { \
+	(lazy_fpu_hooks)->need_fpu();                   		    \
+       } else {                                                             \
+         v3_put_fp_state(core);                                             \
+       }                                                                    \
+    }						                            \
+  } while (0)
+
+#else
+
+// conservative FPU switching
+
+#define V3_FP_EXIT_SAVE(core) v3_get_fp_state(core)
+#define V3_FP_ENTRY_RESTORE(core) v3_put_fp_state(core)
+
+#endif
+
+#endif
+
+#ifdef V3_CONFIG_CHECKPOINT
+
+struct v3_chkpt_ctx;
+
+// save state from structure to checkpoint/migration context
+int v3_save_fp_state(struct v3_chkpt_ctx *ctx, struct guest_info *core);
+
+// load state from checkpoint/migration context to structure
+int v3_load_fp_state(struct v3_chkpt_ctx *ctx, struct guest_info *core);
+
+
+#endif
+
+#endif
diff --git a/palacios/include/palacios/vmm_perftune.h b/palacios/include/palacios/vmm_perftune.h
index 4346374..7876efa 100644
--- a/palacios/include/palacios/vmm_perftune.h
+++ b/palacios/include/palacios/vmm_perftune.h
@@ -24,6 +24,7 @@
 
 #include <palacios/vmm_types.h>
 
+#include <palacios/vmm_time.h>
 
 struct v3_yield_strategy {
     enum {
@@ -58,6 +59,24 @@ void     v3_strategy_driven_yield(struct guest_info *core, uint64_t time_since_l
 
 uint64_t v3_cycle_diff_in_usec(struct guest_info *core, uint64_t earlier_cycles, uint64_t later_cycles);
 
+// The following three macros are intended to make it easy to
+// use strategy-driven yield.  Call the first one when you are out of work
+// then call the second when each time that you want to yield because you are
+// out of work, and then call the third one when you have work to do again
+//
+// This assumes the thread is locked to a core and may behave strangely if 
+// this is not the case.   
+
+#define  V3_NO_WORK(core) {				   \
+  uint64_t _v3_strat_local_first=0, _v3_strat_local_cur=0; \
+  _v3_strat_local_first=v3_get_host_time(core ? &(core->time_state) : 0); 
+  
+  
+#define  V3_STILL_NO_WORK(core)            \
+  _v3_strat_local_cur=v3_get_host_time(core ? &(core->time_state) : 0);              \
+  v3_strategy_driven_yield(core,v3_cycle_diff_in_usec(core,_v3_strat_local_first,_v3_strat_local_cur)); 
+
+#define  V3_HAVE_WORK_AGAIN(core) }
 
 #endif
 
diff --git a/palacios/include/palacios/vmm_types.h b/palacios/include/palacios/vmm_types.h
index 82e83c6..01e22b7 100644
--- a/palacios/include/palacios/vmm_types.h
+++ b/palacios/include/palacios/vmm_types.h
@@ -76,6 +76,10 @@ typedef char sint8_t;
 
 typedef ulong_t addr_t;
 typedef ullong_t v3_reg_t;
+
+typedef uint8_t v3_xmm_reg_t[16];
+typedef uint8_t v3_fp_mmx_reg_t[10];
+
 #endif /* ! __V3VEE__ */
 
 #endif
diff --git a/palacios/src/interfaces/Kconfig b/palacios/src/interfaces/Kconfig
index a72ce87..581852a 100644
--- a/palacios/src/interfaces/Kconfig
+++ b/palacios/src/interfaces/Kconfig
@@ -88,4 +88,11 @@ config HOST_PWRSTAT
 	help
 		Select this if you would like to access energy/power
 		measurements within Palacios
+
+config HOST_LAZY_FPU_SWITCH
+	bool "Host provides lazy FPU context switching"
+	default n
+	help
+		Select this if your host provides lazy context switch support
+                for floating point state and you would like Palacios to use it
 endmenu
diff --git a/palacios/src/interfaces/Makefile b/palacios/src/interfaces/Makefile
index ae10d74..262b6cc 100644
--- a/palacios/src/interfaces/Makefile
+++ b/palacios/src/interfaces/Makefile
@@ -10,6 +10,7 @@ obj-$(V3_CONFIG_HOST_HYPERCALL) += vmm_host_hypercall.o
 obj-$(V3_CONFIG_HOST_PCI) += host_pci.o
 obj-$(V3_CONFIG_HOST_PMU) += vmm_pmu.o
 obj-$(V3_CONFIG_HOST_PWRSTAT) += vmm_pwrstat.o
+obj-$(V3_CONFIG_HOST_LAZY_FPU_SWITCH) += vmm_lazy_fpu.o
 
 obj-y += null.o
 obj-y += vmm_numa.o
diff --git a/palacios/src/interfaces/vmm_lazy_fpu.c b/palacios/src/interfaces/vmm_lazy_fpu.c
new file mode 100644
index 0000000..7562ba0
--- /dev/null
+++ b/palacios/src/interfaces/vmm_lazy_fpu.c
@@ -0,0 +1,36 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2013, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Peter Dinda <pdinda@northwestern.edu>
+ * 
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_debug.h>
+#include <palacios/vmm_types.h>
+#include <palacios/vm_guest.h>
+#include <palacios/vmm_lowlevel.h>
+
+#include <interfaces/vmm_lazy_fpu.h>
+
+struct v3_lazy_fpu_iface * palacios_lazy_fpu_hooks = 0;
+
+
+
+void V3_Init_Lazy_FPU (struct v3_lazy_fpu_iface * lazy_fpu_iface) 
+{
+    palacios_lazy_fpu_hooks = lazy_fpu_iface;
+}
+
+
diff --git a/palacios/src/palacios/Makefile b/palacios/src/palacios/Makefile
index 18cdea1..2ad3889 100644
--- a/palacios/src/palacios/Makefile
+++ b/palacios/src/palacios/Makefile
@@ -20,6 +20,7 @@ obj-y := \
 	vmm_io.o \
 	vmm_lock.o \
 	vmm_mem.o \
+        vmm_fp.o \
 	vmm_msr.o \
 	vmm_paging.o \
 	vmm_options.o \
diff --git a/palacios/src/palacios/svm.c b/palacios/src/palacios/svm.c
index 71e62d6..05d4b7a 100644
--- a/palacios/src/palacios/svm.c
+++ b/palacios/src/palacios/svm.c
@@ -39,6 +39,7 @@
 #include <palacios/vmm_barrier.h>
 #include <palacios/vmm_debug.h>
 
+#include <palacios/vmm_perftune.h>
 
 
 #ifdef V3_CONFIG_CHECKPOINT
@@ -666,6 +667,8 @@ int v3_svm_enter(struct guest_info * info) {
     guest_state->rip = info->rip;
     guest_state->rsp = info->vm_regs.rsp;
 
+    V3_FP_ENTRY_RESTORE(info);
+
 #ifdef V3_CONFIG_SYMCALL
     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
 	update_irq_entry_state(info);
@@ -733,6 +736,8 @@ int v3_svm_enter(struct guest_info * info) {
 
     info->num_exits++;
 
+    V3_FP_EXIT_SAVE(info);
+
     // Save Guest state from VMCB
     info->rip = guest_state->rip;
     info->vm_regs.rsp = guest_state->rsp;
@@ -823,7 +828,9 @@ int v3_start_svm_guest(struct guest_info * info) {
 		info->core_run_state = CORE_RUNNING;
 	    } else  { 
 		PrintDebug(info->vm_info, info, "SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
-		
+
+		V3_NO_WORK(info);
+
 		while (info->core_run_state == CORE_STOPPED) {
 		    
 		    if (info->vm_info->run_state == VM_STOPPED) {
@@ -831,9 +838,12 @@ int v3_start_svm_guest(struct guest_info * info) {
 			return 0;
 		    }
 		    
-		    v3_yield(info,-1);
+		    V3_STILL_NO_WORK(info);
+
 		    //PrintDebug(info->vm_info, info, "SVM core %u: still waiting for INIT\n", info->vcpu_id);
 		}
+
+		V3_HAVE_WORK_AGAIN(info);
 		
 		PrintDebug(info->vm_info, info, "SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
 		
diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c
index cfef4f9..8c34b68 100644
--- a/palacios/src/palacios/vmm.c
+++ b/palacios/src/palacios/vmm.c
@@ -55,6 +55,8 @@ int v3_dbg_enable = 0;
 static void init_cpu(void * arg) {
     uint32_t cpu_id = (uint32_t)(addr_t)arg;
 
+    v3_init_fp();
+
 #ifdef V3_CONFIG_SVM
     if (v3_is_svm_capable()) {
         PrintDebug(VM_NONE, VCORE_NONE, "Machine is SVM Capable\n");
@@ -100,6 +102,9 @@ static void deinit_cpu(void * arg) {
 	    PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
 	    break;
     }
+
+    v3_deinit_fp();
+
 }
 
 void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *options) {
@@ -689,6 +694,7 @@ static int sim_callback(struct guest_info * core, void * private_data) {
     V3_Print(core->vm_info, core, "Simulation callback activated (guest_rip=%p)\n", (void *)core->rip);
 
     while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) {
+        // We spin here if there is noone to yield to
 	v3_yield(NULL,-1);
     }
 
@@ -759,7 +765,8 @@ int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) {
 	if (all_blocked == 1) {
 	    break;
 	}
-
+	
+	// Intentionally spin if there is no one to yield to
 	v3_yield(NULL,-1);
     }
 
diff --git a/palacios/src/palacios/vmm_barrier.c b/palacios/src/palacios/vmm_barrier.c
index 35efe0f..ba88e2b 100644
--- a/palacios/src/palacios/vmm_barrier.c
+++ b/palacios/src/palacios/vmm_barrier.c
@@ -120,6 +120,7 @@ int v3_wait_for_barrier(struct v3_vm_info * vm_info, struct guest_info * local_c
 	    break;
 	}
 
+        // return immediately and spin if there is no one to yield to 
 	v3_yield(local_core,-1);
     }
 
@@ -198,6 +199,10 @@ int v3_wait_at_barrier(struct guest_info * core) {
 	return 0;
     }
 
+#ifdef V3_CONFIG_LAZY_FP_SWITCH
+    v3_get_fp_state(core); // snapshot FP state now regardless of lazy eval
+#endif
+
     V3_Print(core->vm_info, core, "Core %d waiting at barrier\n", core->vcpu_id);
 
     /*  Barrier has been activated. 
@@ -211,8 +216,13 @@ int v3_wait_at_barrier(struct guest_info * core) {
 
     // wait for cpu bit to clear
     while (v3_bitmap_check(&(barrier->cpu_map), core->vcpu_id)) {
+        // Barrier wait will spin if there is no competing work
 	v3_yield(core,-1);
     }
+    
+#ifdef V3_LAZY_FP_SWITCH
+    core->fp_state.need_restore=1;  // restore FP on next entry
+#endif
 
     return 0;
 }
diff --git a/palacios/src/palacios/vmm_checkpoint.c b/palacios/src/palacios/vmm_checkpoint.c
index 92c5e16..7a564e6 100644
--- a/palacios/src/palacios/vmm_checkpoint.c
+++ b/palacios/src/palacios/vmm_checkpoint.c
@@ -412,15 +412,15 @@ struct mem_migration_state {
     struct v3_bitmap  modified_pages; 
 };
 
-static int paging_callback(struct guest_info *core, 
-			   struct v3_shdw_pg_event *event,
-			   void      *priv_data)
+static int shadow_paging_callback(struct guest_info *core, 
+				  struct v3_shdw_pg_event *event,
+				  void      *priv_data)
 {
     struct mem_migration_state *m = (struct mem_migration_state *)priv_data;
     
     if (event->event_type==SHADOW_PAGEFAULT &&
 	event->event_order==SHADOW_PREIMPL &&
-	event->error_code.write) { 
+	event->error_code.write) { // Note, assumes VTLB behavior where we will see the write even if preceded by a read
 	addr_t gpa;
 	if (!v3_gva_to_gpa(core,event->gva,&gpa)) {
 	    // write to this page
@@ -434,7 +434,30 @@ static int paging_callback(struct guest_info *core,
     
     return 0;
 }
-	
+
+
+/*
+static int nested_paging_callback(struct guest_info *core, 
+				  struct v3_nested_pg_event *event,
+				  void      *priv_data)
+{
+    struct mem_migration_state *m = (struct mem_migration_state *)priv_data;
+    
+    if (event->event_type==NESTED_PAGEFAULT &&
+	event->event_order==NESTED_PREIMPL &&
+	event->error_code.write) { // Assumes we will see a write after reads
+	if (event->gpa<core->vm_info->mem_size) { 
+	  v3_bitmap_set(&(m->modified_pages),(event->gpa)>>12);
+	} else {
+	  // no worries, this isn't physical memory
+	}
+    } else {
+      // we don't care about other events
+    }
+    
+    return 0;
+}
+*/	
 
 
 static struct mem_migration_state *start_page_tracking(struct v3_vm_info *vm)
@@ -456,10 +479,27 @@ static struct mem_migration_state *start_page_tracking(struct v3_vm_info *vm)
 	V3_Free(m);
     }
 
-    v3_register_shadow_paging_event_callback(vm,paging_callback,m);
+    // We assume that the migrator has already verified that all cores are
+    // using the identical model (shadow or nested)
+    // This must not change over the execution of the migration
 
-    for (i=0;i<vm->num_cores;i++) {
+    if (vm->cores[0].shdw_pg_mode==SHADOW_PAGING) { 
+      v3_register_shadow_paging_event_callback(vm,shadow_paging_callback,m);
+
+      for (i=0;i<vm->num_cores;i++) {
 	v3_invalidate_shadow_pts(&(vm->cores[i]));
+      }
+    } else if (vm->cores[0].shdw_pg_mode==NESTED_PAGING) { 
+      //v3_register_nested_paging_event_callback(vm,nested_paging_callback,m);
+      
+      for (i=0;i<vm->num_cores;i++) {
+	//v3_invalidate_nested_addr_range(&(vm->cores[i]),0,vm->mem_size-1);
+      }
+    } else {
+      PrintError(vm, VCORE_NONE, "Unsupported paging mode\n");
+      v3_bitmap_deinit(&(m->modified_pages));
+      V3_Free(m);
+      return 0;
     }
     
     // and now we should get callbacks as writes happen
@@ -469,11 +509,15 @@ static struct mem_migration_state *start_page_tracking(struct v3_vm_info *vm)
 
 static void stop_page_tracking(struct mem_migration_state *m)
 {
-    v3_unregister_shadow_paging_event_callback(m->vm,paging_callback,m);
-    
-    v3_bitmap_deinit(&(m->modified_pages));
+  if (m->vm->cores[0].shdw_pg_mode==SHADOW_PAGING) { 
+    v3_unregister_shadow_paging_event_callback(m->vm,shadow_paging_callback,m);
+  } else {
+    //v3_unregister_nested_paging_event_callback(m->vm,nested_paging_callback,m);
+  }
     
-    V3_Free(m);
+  v3_bitmap_deinit(&(m->modified_pages));
+  
+  V3_Free(m);
 }
 
 	    
@@ -731,6 +775,10 @@ static int load_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt
 	PrintError(info->vm_info, info, "Could not open context to load core\n");
 	goto loadfailout;
     }
+    
+    // Run state is needed to determine when AP cores need
+    // to be immediately run after resume
+    V3_CHKPT_LOAD(ctx,"run_state",info->core_run_state,loadfailout);
 
     V3_CHKPT_LOAD(ctx, "RIP", info->rip, loadfailout);
     
@@ -798,6 +846,11 @@ static int load_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt
     V3_CHKPT_LOAD(ctx, "GUEST_CR0", info->shdw_pg_state.guest_cr0, loadfailout);
     V3_CHKPT_LOAD(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, loadfailout);
 
+    // floating point
+    if (v3_load_fp_state(ctx,info)) {
+      goto loadfailout;
+    }
+
     v3_chkpt_close_ctx(ctx); ctx=0;
 
     PrintDebug(info->vm_info, info, "Finished reading guest_info information\n");
@@ -912,6 +965,7 @@ static int save_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt
 	goto savefailout;
     }
 
+    V3_CHKPT_SAVE(ctx,"run_state",info->core_run_state,savefailout);
 
     V3_CHKPT_SAVE(ctx, "RIP", info->rip, savefailout);
     
@@ -979,6 +1033,11 @@ static int save_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt
     V3_CHKPT_SAVE(ctx, "GUEST_CR0", info->shdw_pg_state.guest_cr0, savefailout);
     V3_CHKPT_SAVE(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, savefailout);
 
+    // floating point
+    if (v3_save_fp_state(ctx,info)) {
+      goto savefailout;
+    }
+
     v3_chkpt_close_ctx(ctx); ctx=0;
 
     if (opts & V3_CHKPT_OPT_SKIP_ARCHDEP) {
@@ -1200,11 +1259,15 @@ int v3_chkpt_send_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_
     struct mem_migration_state *mm_state;
     int i;
 
-    // Currently will work only for shadow paging
-    for (i=0;i<vm->num_cores;i++) { 
-      if (vm->cores[i].shdw_pg_mode!=SHADOW_PAGING && !(opts & V3_CHKPT_OPT_SKIP_MEM)) { 
-	PrintError(vm, VCORE_NONE, "Cannot currently handle nested paging\n");
-	return -1;
+    // Cores must all be in the same mode
+    // or we must be skipping mmeory
+    if (!(opts & V3_CHKPT_OPT_SKIP_MEM)) { 
+      v3_paging_mode_t mode = vm->cores[0].shdw_pg_mode;
+      for (i=1;i<vm->num_cores;i++) { 
+	if (vm->cores[i].shdw_pg_mode != mode) { 
+	  PrintError(vm, VCORE_NONE, "Cores having different paging modes (nested and shadow) are not supported\n");
+	  return -1;
+	}
       }
     }
     
diff --git a/palacios/src/palacios/vmm_fp.c b/palacios/src/palacios/vmm_fp.c
new file mode 100644
index 0000000..d3b6ca3
--- /dev/null
+++ b/palacios/src/palacios/vmm_fp.c
@@ -0,0 +1,179 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2013, Peter Dinda <pdinda@northwestern.edu> 
+ * Copyright (c) 2013, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Peter Dinda <pdinda@northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_fp.h>
+#include <palacios/vm_guest.h>
+#include <palacios/vmm_lowlevel.h>
+
+#ifdef V3_CONFIG_CHECKPOINT
+#include <palacios/vmm_checkpoint.h>
+#endif
+
+
+static int can_do_fp=-1;
+
+// assumes identical on all cores...
+int v3_can_handle_fp_state()
+{
+  if (can_do_fp!=-1) { 
+    return can_do_fp;
+  } else {
+    uint32_t eax, ebx, ecx, edx;
+
+    v3_cpuid(CPUID_FEATURE_IDS,&eax,&ebx,&ecx,&edx);
+    
+    can_do_fp= !!(edx & (1<<25)); // do we have SSE?
+    
+    return can_do_fp;
+  }
+}
+
+int v3_init_fp()
+{
+  if (v3_can_handle_fp_state()) { 
+    V3_Print(VM_NONE,VCORE_NONE,"Floating point save/restore init:  available on this hardware\n");
+  } else {
+    V3_Print(VM_NONE,VCORE_NONE,"Floating point save/restore init:  UNAVAILABLE ON THIS HARDWARE\n");
+  }
+  return 0;
+}
+
+int v3_deinit_fp()
+{
+  V3_Print(VM_NONE,VCORE_NONE,"Floating point save/restore deinited\n");
+  return 0;
+}
+
+#define EFER_MSR 0xc0000080
+
+
+int v3_get_fp_state(struct guest_info *core)
+{ 
+  if (v3_can_handle_fp_state()) { 
+    /*
+      If the fast-FXSAVE/FXRSTOR (FFXSR) feature is enabled in EFER, FXSAVE and FXRSTOR do not save or restore the XMM0â15 registers when executed in 64-bit mode at CPL 0. The x87 environment and MXCSR are saved whether fast-FXSAVE/FXRSTOR is enabled or not. Software can use the CPUID instruction to determine whether the fast-FXSAVE/FXRSTOR feature is available
+      (CPUID Fn8000_0001h_EDX[FFXSR]). The fast-FXSAVE/FXRSTOR feature has no effect on FXSAVE/FXRSTOR in non 64-bit mode or when CPL > 0.
+      
+    */
+
+    // We need to assure that the fast-FXSAVE/FXRSTOR are not on
+    // otherwise we will NOT have the XMM regs since we running at CPL 0
+    //
+
+    int restore=0;
+    uint32_t high,low;
+    
+    v3_get_msr(EFER_MSR,&high,&low);
+    
+    if (low & (0x1<<14)) { 
+      // fast save is in effect
+      low &= ~(0x1<<14);
+      restore=1;
+      v3_set_msr(EFER_MSR, high, low);
+    }
+    
+    __asm__ __volatile__(" rex64/fxsave %0 ; "
+			 : "=m"(core->fp_state.state)); /* no input, no clobber */
+    if (restore) { 
+      low |= 0x1<<14;
+      v3_set_msr(EFER_MSR, high, low);
+    }
+
+    // this is a giant guess
+    // we really need to capture the state type as seen in the guest, not here...
+    core->fp_state.state_type=V3_FP_MODE_64;
+    
+    return 0;
+
+  } else {
+    return -1;
+  }
+}
+
+
+// Restore FP state from this structure to this core
+int v3_put_fp_state(struct guest_info *core)
+{
+  if (v3_can_handle_fp_state()) {
+    // We need to assure that the fast-FXSAVE/FXRSTOR are not on
+    // otherwise we will NOT have the XMM regs since we running at CPL 0
+    //
+
+    int restore=0;
+    uint32_t high,low;
+    
+    v3_get_msr(EFER_MSR,&high,&low);
+    
+    if (low & (0x1<<14)) { 
+      // fast restore is in effect
+      low &= ~(0x1<<14);
+      restore=1;
+      v3_set_msr(EFER_MSR, high, low);
+    }
+
+    __asm__ __volatile__(" rex64/fxrstor %0; "
+			 : /* no output */
+			 : "m"((core->fp_state.state)) ); /* no clobber*/
+
+    
+    if (restore) { 
+      low |= 0x1<<14;
+      v3_set_msr(EFER_MSR, high, low);
+    }
+
+    return 0;
+  } else {
+    return -1;
+  }
+}
+
+#ifdef V3_CONFIG_CHECKPOINT
+
+
+int v3_save_fp_state(struct v3_chkpt_ctx *ctx, struct guest_info *core)
+{
+  V3_CHKPT_SAVE(ctx, "FP_STATE_TYPE", core->fp_state.state_type, savefailout);
+  if (v3_chkpt_save(ctx,"FP_STATE_BLOB",sizeof(core->fp_state.state),&(core->fp_state.state))) { 
+    goto savefailout;
+  }
+  
+  return 0;
+
+ savefailout:
+  PrintError(core->vm_info,core,"Unable to save floating point state\n");
+  return -1;
+}
+
+
+int v3_load_fp_state(struct v3_chkpt_ctx *ctx, struct guest_info *core)
+{
+  V3_CHKPT_LOAD(ctx, "FP_STATE_TYPE", core->fp_state.state_type, loadfailout);
+  if (v3_chkpt_load(ctx,"FP_STATE_BLOB",sizeof(core->fp_state.state),&(core->fp_state.state))) { 
+    goto loadfailout;
+  }
+  
+  return 0;
+
+ loadfailout:
+  PrintError(core->vm_info,core,"Unable to load floating point state\n");
+  return -1;
+}
+
+#endif
diff --git a/palacios/src/palacios/vmx.c b/palacios/src/palacios/vmx.c
index ae3fad4..de81dfc 100644
--- a/palacios/src/palacios/vmx.c
+++ b/palacios/src/palacios/vmx.c
@@ -1028,7 +1028,8 @@ int v3_vmx_enter(struct guest_info * info) {
 	
 	check_vmcs_write(VMCS_PREEMPT_TIMER, preempt_window);
     }
-   
+
+    V3_FP_ENTRY_RESTORE(info);
 
     {	
 	uint64_t entry_tsc = 0;
@@ -1081,6 +1082,8 @@ int v3_vmx_enter(struct guest_info * info) {
 
     info->num_exits++;
 
+    V3_FP_EXIT_SAVE(info);
+
     /* If we have the preemption time, then use it to get more accurate guest time */
     if (vmx_info->pin_ctrls.active_preempt_timer) {
 	uint32_t cycles_left = 0;
@@ -1187,6 +1190,8 @@ int v3_start_vmx_guest(struct guest_info * info) {
 	    } else {
 		
 		PrintDebug(info->vm_info, info, "VMX core %u: Waiting for core initialization\n", info->vcpu_id);
+
+                V3_NO_WORK(info);
 		
 		while (info->core_run_state == CORE_STOPPED) {
 		    
@@ -1194,11 +1199,13 @@ int v3_start_vmx_guest(struct guest_info * info) {
 			// The VM was stopped before this core was initialized. 
 			return 0;
 		    }
-		    
-		    v3_yield(info,-1);
+
+		    V3_STILL_NO_WORK(info);
 		    //PrintDebug(info->vm_info, info, "VMX core %u: still waiting for INIT\n",info->vcpu_id);
 		}
-		
+
+		V3_HAVE_WORK_AGAIN(info);
+
 		PrintDebug(info->vm_info, info, "VMX core %u initialized\n", info->vcpu_id);
 		
 		// We'll be paranoid about race conditions here