Compile with support for Intel VMX
+
config FRAME_POINTER
bool "Compile with Frame pointers"
default n
endmenu
source "palacios/src/interfaces/Kconfig"
+
+menu "Virtual core specialization"
+
+config CUSTOM_CPUID
+ bool "Use custom CPU information (vendor, etc)"
+ default y
+ help
+ If set, the CPU information will be for a special V3VEE vendor.
+ This should result in identical guest kernel setup, regardless
+ of the underlying hardware, but it also means that the guest kernel
+ has no chance of employing CPU-specific bug fixes.
+
+config STRICT_MSR_SEMANTICS
+ bool "Use strict RDMSR/WRMSR semantics"
+ default y
+ help
+ Use strict MSR semantics - when an unhandled MSR is read or written,
+ a GPF is generated. This is typically usd with CUSTOM_CPU_TYPE on.
+
+config FP_SWITCH
+ bool "Floating point context switching"
+ default y
+ help
+ If set, floating point is handled for context switches
+ (VM1->VM2->VM1 and/or VM->HOST->VM). This can be disabled
+ for environments where a single VM is the only user of FP.
+ Note that even if disabled, FP save/restore code is included
+ for support of checkpoint/restore.
+
+config LAZY_FP_SWITCH
+ bool "Use host-based lazy floating point context switching"
+ depends on FP_SWITCH && HOST_LAZY_FPU_SWITCH
+ default y
+ help
+ When true, the host's lazy floating point save/restore
+ mechanism is notified on each exit and entry. If false,
+ the floating point state is explicitly saved on each exit
+ and restored on each entry---this save/restore is entirely
+ done in Palacios.
+
+
+endmenu
+
source "palacios/src/extensions/Kconfig"
config TELEMETRY
#include <linux/smp.h>
#include <linux/vmalloc.h>
+#include <asm/i387.h>
+
#include <palacios/vmm.h>
#include <palacios/vmm_host_events.h>
+
+#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH
+#include <interfaces/vmm_lazy_fpu.h>
+#endif
+
#include "palacios.h"
#include "mm.h"
#include "memcheck.h"
#include "lockcheck.h"
+
+
// The following can be used to track heap bugs
// zero memory after allocation
#define ALLOC_ZERO_MEM 0
void * pg_addr = NULL;
if (num_pages<=0) {
- ERROR("ALERT ALERT Attempt to allocate zero or fewer pages\n");
+ ERROR("ALERT ALERT Attempt to allocate zero or fewer pages (%d pages, alignment %d, node %d, constraints 0x%x)\n",num_pages, alignment, node_id, constraints);
return NULL;
}
pg_addr = (void *)alloc_palacios_pgs(num_pages, alignment, node_id, constraints);
if (!pg_addr) {
- ERROR("ALERT ALERT Page allocation has FAILED Warning\n");
+ ERROR("ALERT ALERT Page allocation has FAILED Warning (%d pages, alignment %d, node %d, constraints 0x%x)\n",num_pages, alignment, node_id, constraints);
return NULL;
}
*/
void palacios_free_pages(void * page_paddr, int num_pages) {
+ if (!page_paddr) {
+ ERROR("Ignoring free pages: 0x%p (0x%lx)for %d pages\n", page_paddr, (uintptr_t)page_paddr, num_pages);
+ dump_stack();
+ }
pg_frees += num_pages;
free_palacios_pgs((uintptr_t)page_paddr, num_pages);
MEMCHECK_FREE_PAGES(page_paddr,num_pages*4096);
void * addr
)
{
+ if (!addr) {
+ ERROR("Ignoring free : 0x%p\n", addr);
+ dump_stack();
+ }
frees++;
kfree(addr-ALLOC_PAD);
MEMCHECK_KFREE(addr-ALLOC_PAD);
allow_signal(SIGKILL);
*/
+#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH
+ // We are a kernel thread that needs FPU save/restore state
+ // vcores definitely need this, all the other threads get it too,
+ // but they just won't use it
+ fpu_alloc(&(current->thread.fpu));
+#endif
ret = thread_info->fn(thread_info->arg);
-
INFO("Palacios Thread (%s) EXITING\n", thread_info->name);
palacios_free(thread_info);
// handle cleanup
+ // We rely on do_exit to free the fpu data
+ // since we could get switched at any point until the thread is done...
+
do_exit(ret);
-
+
return 0; // should not get here.
}
LOCKCHECK_UNLOCK_IRQRESTORE_POST(mutex,(unsigned long)flags);
}
+void palacios_used_fpu(void)
+{
+ struct thread_info *cur = current_thread_info();
+
+ // We assume we are not preemptible here...
+ cur->status |= TS_USEDFPU;
+ clts();
+ // After this, FP Save should be handled by Linux if it
+ // switches to a different task and that task uses FPU
+}
+
+inline int ists(void)
+{
+ return read_cr0() & X86_CR0_TS;
+
+}
+void palacios_need_fpu(void)
+{
+ // We assume we are not preemptible here...
+ if (ists()) {
+ // we have been switched back to from somewhere else...
+ // Do a restore now - this will also do a clts()
+ math_state_restore();
+ }
+}
+
+
/**
* Structure used by the Palacios hypervisor to interface with the host kernel.
*/
};
+#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH
+// Note that this host interface is defined here since it's
+// intertwined with thread creation...
+static struct v3_lazy_fpu_iface palacios_fpu_hooks = {
+ .used_fpu = palacios_used_fpu,
+ .need_fpu = palacios_need_fpu
+};
+
+#endif
int palacios_vmm_init( char *options )
Init_V3(&palacios_os_hooks, cpu_mask, num_cpus, options);
+#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH
+ V3_Init_Lazy_FPU(&palacios_fpu_hooks);
+#endif
+
return 0;
}
void palacios_sleep_cpu(unsigned int us);
unsigned int palacios_get_cpu(void);
unsigned int palacios_get_cpu_khz(void);
+void palacios_used_fpu(void);
+void palacios_need_fpu(void);
void *palacios_mutex_alloc(void); // allocates and inits a lock
void palacios_mutex_init(void *mutex); // only inits a lock
void palacios_mutex_deinit(void *mutex); // only deinits a lock
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2013, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Peter Dinda <pdinda@northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __VMM_LAZY_FPU
+#define __VMM_LAZY_FPU
+
+#include <palacios/vmm_types.h>
+
+
+struct v3_lazy_fpu_iface {
+
+ // if these two are provided then lazy FP save/restore handled by host
+ // indicate that the calling thread has used floating point
+ void (*used_fpu)(void);
+ // indicate that the calling thread wants to use floating point again
+ void (*need_fpu)(void);
+
+};
+
+
+/*
+ * function prototypes
+ */
+
+extern void V3_Init_Lazy_FPU(struct v3_lazy_fpu_iface * palacios_lazy_fpu);
+
+#ifdef __V3VEE__
+
+#define V3_LAZY_FPU_USED() \
+ do { \
+ extern struct v3_lazy_fpu_iface * palacios_lazy_fpu_hooks; \
+ if ((palacios_lazy_fpu_hooks) && (palacios_lazy_fpu_hooks)->used_fpu) { \
+ (palacios_lazy_fpu_hooks)->used_fpu(); \
+ } \
+ } while (0)
+
+#define V3_LAZY_FPU_NEED() \
+ do { \
+ extern struct v3_lazy_fpu_iface * palacios_lazy_fpu_hooks; \
+ if ((palacios_lazy_fpu_hooks) && (palacios_lazy_fpu_hooks)->need_fpu) { \
+ (palacios_lazy_fpu_hooks)->need_fpu(); \
+ } \
+ } while (0)
+
+#endif
+
+#endif
#include <palacios/vmm_mem_hook.h>
#include <palacios/vmm_io.h>
#include <palacios/vmm_shadow_paging.h>
+#include <palacios/vmm_direct_paging.h>
#include <palacios/vmm_intr.h>
#include <palacios/vmm_excp.h>
#include <palacios/vmm_dev_mgr.h>
#include <palacios/vmm_events.h>
#include <palacios/vmm_scheduler.h>
#include <palacios/vmm_fw_cfg.h>
-
+#include <palacios/vmm_fp.h>
#include <palacios/vmm_perftune.h>
#ifdef V3_CONFIG_TELEMETRY
v3_paging_mode_t shdw_pg_mode;
struct v3_shdw_pg_state shdw_pg_state;
+ //struct v3_nested_pg_state nested_pg_state;
addr_t direct_map_pt;
struct v3_segments segments;
struct v3_msrs msrs;
+ struct v3_fp_state fp_state;
void * vmm_data;
struct v3_mem_hooks mem_hooks;
struct v3_shdw_impl_state shdw_impl;
+ //struct v3_nested_impl_state nested_impl;
void * sched_priv_data;
struct v3_io_map io_map;
unsigned int (*get_cpu)(void);
-
-
void * (*start_kernel_thread)(int (*fn)(void * arg), void * arg, char * thread_name);
void (*interrupt_cpu)(struct v3_vm_info * vm, int logical_cpu, int vector);
void (*call_on_cpu)(int logical_cpu, void (*fn)(void * arg), void * arg);
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2013, Peter Dinda <pdinda@northwestern.edu>
+ * Copyright (c) 2013, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Peter Dinda <pdinda@northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __VMM_FP_H
+#define __VMM_FP_H
+
+#include <palacios/vmm_types.h>
+#include <palacios/vmm.h>
+#ifdef V3_CONFIG_LAZY_FPU_SWITCH
+#include <interfaces/vmm_lazy_fpu.h>
+#endif
+
+// the FPRs are arranged into the
+// precise layout of the FXSAVE/FXRESTORE instructions
+// bytes 32+, which is common for all three variants
+// 8*6 reserved + 8*10 (fpu/mmx) + 16*16 (xmm)
+// + 3*16 (res) + 3*16 (ava) = 480 bytes
+// another 32 bytes are used for the store header
+// which varies depending on machine mode
+struct v3_fp_regs {
+ v3_fp_mmx_reg_t stmm0; // stmm0..7 are the x87 stack or mmx regs
+ uint8_t res0[6];
+ v3_fp_mmx_reg_t stmm1;
+ uint8_t res1[6];
+ v3_fp_mmx_reg_t stmm2;
+ uint8_t res2[6];
+ v3_fp_mmx_reg_t stmm3;
+ uint8_t res3[6];
+ v3_fp_mmx_reg_t stmm4;
+ uint8_t res4[6];
+ v3_fp_mmx_reg_t stmm5;
+ uint8_t res5[6];
+ v3_fp_mmx_reg_t stmm6;
+ uint8_t res6[6];
+ v3_fp_mmx_reg_t stmm7;
+ uint8_t res7[6];
+ v3_xmm_reg_t xmm0; // xmm0..7 are the "classic" SSE regs
+ v3_xmm_reg_t xmm1;
+ v3_xmm_reg_t xmm2;
+ v3_xmm_reg_t xmm3;
+ v3_xmm_reg_t xmm4;
+ v3_xmm_reg_t xmm5;
+ v3_xmm_reg_t xmm6;
+ v3_xmm_reg_t xmm7;
+ v3_xmm_reg_t xmm8; //xmm8..15 are the "new" SSE reg
+ v3_xmm_reg_t xmm9;
+ v3_xmm_reg_t xmm10;
+ v3_xmm_reg_t xmm11;
+ v3_xmm_reg_t xmm12;
+ v3_xmm_reg_t xmm13;
+ v3_xmm_reg_t xmm14;
+ v3_xmm_reg_t xmm15;
+ v3_xmm_reg_t res16; // reserved
+ v3_xmm_reg_t res17;
+ v3_xmm_reg_t res18;
+ v3_xmm_reg_t ava19;
+ v3_xmm_reg_t ava20;
+ v3_xmm_reg_t ava21;
+} __attribute__((packed)) __attribute__((aligned(16)));
+
+// FXSAVE, 32 bit mode header (32 bytes)
+// V3_FP_MODE_32
+struct v3_fp_32_state {
+ uint16_t fcw;
+ uint16_t fsw;
+ uint8_t ftw;
+ uint8_t res0;
+ uint16_t fop;
+ uint32_t fip; //fpu instruction pointer
+ uint16_t fcs; //fpu code segment selector
+ uint16_t res1;
+ uint32_t fdp; //fpu data pointer
+ uint16_t fds; //fpu data segment selector
+ uint16_t res2;
+ uint32_t mxcsr;
+ uint32_t mxcsr_mask;
+} __attribute__((packed)) __attribute__((aligned(16)));
+
+// FXSAVE, 64 bit mode header, REX.W=1 (32 bytes)
+// V3_FP_MODE_64
+struct v3_fp_64_state {
+ uint16_t fcw;
+ uint16_t fsw;
+ uint8_t ftw;
+ uint8_t res0;
+ uint16_t fop;
+ uint64_t fip; //fpu instruction pointer
+ uint64_t fdp; //fpu data pointer
+ uint32_t mxcsr;
+ uint32_t mxcsr_mask;
+} __attribute__((packed)) __attribute__((aligned(16)));
+
+
+// FXSAVE, 64 bit mode header, REX.W=0 (32 bytes)
+// V3_FP_MODE_64_COMPAT
+struct v3_fp_64compat_state {
+ uint16_t fcw;
+ uint16_t fsw;
+ uint8_t ftw;
+ uint8_t res0;
+ uint16_t fop;
+ uint32_t fip; //fpu instruction pointer
+ uint16_t fcs; //fpu code segment selector
+ uint16_t res1;
+ uint32_t fdp; //fpu data pointer
+ uint16_t fds; //fpu data segment selector
+ uint16_t res2;
+ uint32_t mxcsr;
+ uint32_t mxcsr_mask;
+} __attribute__((packed)) __attribute__((aligned(16)));
+
+
+//
+// This is an FXSAVE block
+//
+struct v3_fp_state_core {
+ union {
+ struct v3_fp_32_state fp32;
+ struct v3_fp_64_state fp64;
+ struct v3_fp_64compat_state fp64compat;
+ } header;
+ struct v3_fp_regs fprs;
+} __attribute__((packed)) __attribute__((aligned(16)));
+
+struct v3_fp_state {
+ // Do we need to restore on next entry?
+ int need_restore;
+ // The meaning
+ enum {V3_FP_MODE_32=0, V3_FP_MODE_64, V3_FP_MODE_64_COMPAT} state_type;
+ struct v3_fp_state_core state __attribute__((aligned(16)));
+} ;
+
+
+struct guest_info;
+
+// Can we save FP state on this core?
+int v3_can_handle_fp_state();
+
+// Save state from this core to the structure
+int v3_get_fp_state(struct guest_info *core);
+
+// Restore FP state from this structure to this core
+int v3_put_fp_state(struct guest_info *core);
+
+int v3_init_fp(void);
+int v3_deinit_fp(void);
+
+#ifndef V3_CONFIG_FP_SWITCH
+
+#define V3_FP_EXIT_SAVE(core)
+#define V3_FP_ENTRY_RESTORE(core)
+
+#else
+
+#ifdef V3_CONFIG_LAZY_FPU_SWITCH
+
+
+/* Ideally these would use the TS trick to do lazy calls to used_fpu() */
+#define V3_FP_EXIT_SAVE(core) \
+ do { \
+ extern struct v3_lazy_fpu_hooks * lazy_fpu_hooks; \
+ if ((lazy_fpu_hooks) && (lazy_fpu_hooks)->used_fpu) { \
+ (lazy_fpu_hooks)->used_fpu(); \
+ } else { \
+ v3_get_fp_state(core); \
+ } \
+ } while (0)
+
+#define V3_FP_ENTRY_RESTORE(core) \
+ do { \
+ extern struct v3_lazy_fpu_hooks * lazy_fpu_hooks; \
+ if ((core)->fp_state.need_restore) { \
+ v3_put_fp_state(core); \
+ (core)->fp_state.need_restore=0; \
+ } else { \
+ if ((lazy_fpu_hooks) && (lazy_fpu_hooks)->will_use_fpu) { \
+ (lazy_fpu_hooks)->need_fpu(); \
+ } else { \
+ v3_put_fp_state(core); \
+ } \
+ } \
+ } while (0)
+
+#else
+
+// conservative FPU switching
+
+#define V3_FP_EXIT_SAVE(core) v3_get_fp_state(core)
+#define V3_FP_ENTRY_RESTORE(core) v3_put_fp_state(core)
+
+#endif
+
+#endif
+
+#ifdef V3_CONFIG_CHECKPOINT
+
+struct v3_chkpt_ctx;
+
+// save state from structure to checkpoint/migration context
+int v3_save_fp_state(struct v3_chkpt_ctx *ctx, struct guest_info *core);
+
+// load state from checkpoint/migration context to structure
+int v3_load_fp_state(struct v3_chkpt_ctx *ctx, struct guest_info *core);
+
+
+#endif
+
+#endif
#include <palacios/vmm_types.h>
+#include <palacios/vmm_time.h>
struct v3_yield_strategy {
enum {
uint64_t v3_cycle_diff_in_usec(struct guest_info *core, uint64_t earlier_cycles, uint64_t later_cycles);
+// The following three macros are intended to make it easy to
+// use strategy-driven yield. Call the first one when you are out of work
+// then call the second when each time that you want to yield because you are
+// out of work, and then call the third one when you have work to do again
+//
+// This assumes the thread is locked to a core and may behave strangely if
+// this is not the case.
+
+#define V3_NO_WORK(core) { \
+ uint64_t _v3_strat_local_first=0, _v3_strat_local_cur=0; \
+ _v3_strat_local_first=v3_get_host_time(core ? &(core->time_state) : 0);
+
+
+#define V3_STILL_NO_WORK(core) \
+ _v3_strat_local_cur=v3_get_host_time(core ? &(core->time_state) : 0); \
+ v3_strategy_driven_yield(core,v3_cycle_diff_in_usec(core,_v3_strat_local_first,_v3_strat_local_cur));
+
+#define V3_HAVE_WORK_AGAIN(core) }
#endif
typedef ulong_t addr_t;
typedef ullong_t v3_reg_t;
+
+typedef uint8_t v3_xmm_reg_t[16];
+typedef uint8_t v3_fp_mmx_reg_t[10];
+
#endif /* ! __V3VEE__ */
#endif
help
Select this if you would like to access energy/power
measurements within Palacios
+
+config HOST_LAZY_FPU_SWITCH
+ bool "Host provides lazy FPU context switching"
+ default n
+ help
+ Select this if your host provides lazy context switch support
+ for floating point state and you would like Palacios to use it
endmenu
obj-$(V3_CONFIG_HOST_PCI) += host_pci.o
obj-$(V3_CONFIG_HOST_PMU) += vmm_pmu.o
obj-$(V3_CONFIG_HOST_PWRSTAT) += vmm_pwrstat.o
+obj-$(V3_CONFIG_HOST_LAZY_FPU_SWITCH) += vmm_lazy_fpu.o
obj-y += null.o
obj-y += vmm_numa.o
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2013, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Peter Dinda <pdinda@northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_debug.h>
+#include <palacios/vmm_types.h>
+#include <palacios/vm_guest.h>
+#include <palacios/vmm_lowlevel.h>
+
+#include <interfaces/vmm_lazy_fpu.h>
+
+struct v3_lazy_fpu_iface * palacios_lazy_fpu_hooks = 0;
+
+
+
+void V3_Init_Lazy_FPU (struct v3_lazy_fpu_iface * lazy_fpu_iface)
+{
+ palacios_lazy_fpu_hooks = lazy_fpu_iface;
+}
+
+
vmm_io.o \
vmm_lock.o \
vmm_mem.o \
+ vmm_fp.o \
vmm_msr.o \
vmm_paging.o \
vmm_options.o \
#include <palacios/vmm_barrier.h>
#include <palacios/vmm_debug.h>
+#include <palacios/vmm_perftune.h>
#ifdef V3_CONFIG_CHECKPOINT
guest_state->rip = info->rip;
guest_state->rsp = info->vm_regs.rsp;
+ V3_FP_ENTRY_RESTORE(info);
+
#ifdef V3_CONFIG_SYMCALL
if (info->sym_core_state.symcall_state.sym_call_active == 0) {
update_irq_entry_state(info);
info->num_exits++;
+ V3_FP_EXIT_SAVE(info);
+
// Save Guest state from VMCB
info->rip = guest_state->rip;
info->vm_regs.rsp = guest_state->rsp;
info->core_run_state = CORE_RUNNING;
} else {
PrintDebug(info->vm_info, info, "SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
-
+
+ V3_NO_WORK(info);
+
while (info->core_run_state == CORE_STOPPED) {
if (info->vm_info->run_state == VM_STOPPED) {
return 0;
}
- v3_yield(info,-1);
+ V3_STILL_NO_WORK(info);
+
//PrintDebug(info->vm_info, info, "SVM core %u: still waiting for INIT\n", info->vcpu_id);
}
+
+ V3_HAVE_WORK_AGAIN(info);
PrintDebug(info->vm_info, info, "SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
static void init_cpu(void * arg) {
uint32_t cpu_id = (uint32_t)(addr_t)arg;
+ v3_init_fp();
+
#ifdef V3_CONFIG_SVM
if (v3_is_svm_capable()) {
PrintDebug(VM_NONE, VCORE_NONE, "Machine is SVM Capable\n");
PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
break;
}
+
+ v3_deinit_fp();
+
}
void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *options) {
V3_Print(core->vm_info, core, "Simulation callback activated (guest_rip=%p)\n", (void *)core->rip);
while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) {
+ // We spin here if there is noone to yield to
v3_yield(NULL,-1);
}
if (all_blocked == 1) {
break;
}
-
+
+ // Intentionally spin if there is no one to yield to
v3_yield(NULL,-1);
}
break;
}
+ // return immediately and spin if there is no one to yield to
v3_yield(local_core,-1);
}
return 0;
}
+#ifdef V3_CONFIG_LAZY_FP_SWITCH
+ v3_get_fp_state(core); // snapshot FP state now regardless of lazy eval
+#endif
+
V3_Print(core->vm_info, core, "Core %d waiting at barrier\n", core->vcpu_id);
/* Barrier has been activated.
// wait for cpu bit to clear
while (v3_bitmap_check(&(barrier->cpu_map), core->vcpu_id)) {
+ // Barrier wait will spin if there is no competing work
v3_yield(core,-1);
}
+
+#ifdef V3_LAZY_FP_SWITCH
+ core->fp_state.need_restore=1; // restore FP on next entry
+#endif
return 0;
}
struct v3_bitmap modified_pages;
};
-static int paging_callback(struct guest_info *core,
- struct v3_shdw_pg_event *event,
- void *priv_data)
+static int shadow_paging_callback(struct guest_info *core,
+ struct v3_shdw_pg_event *event,
+ void *priv_data)
{
struct mem_migration_state *m = (struct mem_migration_state *)priv_data;
if (event->event_type==SHADOW_PAGEFAULT &&
event->event_order==SHADOW_PREIMPL &&
- event->error_code.write) {
+ event->error_code.write) { // Note, assumes VTLB behavior where we will see the write even if preceded by a read
addr_t gpa;
if (!v3_gva_to_gpa(core,event->gva,&gpa)) {
// write to this page
return 0;
}
-
+
+
+/*
+static int nested_paging_callback(struct guest_info *core,
+ struct v3_nested_pg_event *event,
+ void *priv_data)
+{
+ struct mem_migration_state *m = (struct mem_migration_state *)priv_data;
+
+ if (event->event_type==NESTED_PAGEFAULT &&
+ event->event_order==NESTED_PREIMPL &&
+ event->error_code.write) { // Assumes we will see a write after reads
+ if (event->gpa<core->vm_info->mem_size) {
+ v3_bitmap_set(&(m->modified_pages),(event->gpa)>>12);
+ } else {
+ // no worries, this isn't physical memory
+ }
+ } else {
+ // we don't care about other events
+ }
+
+ return 0;
+}
+*/
static struct mem_migration_state *start_page_tracking(struct v3_vm_info *vm)
V3_Free(m);
}
- v3_register_shadow_paging_event_callback(vm,paging_callback,m);
+ // We assume that the migrator has already verified that all cores are
+ // using the identical model (shadow or nested)
+ // This must not change over the execution of the migration
- for (i=0;i<vm->num_cores;i++) {
+ if (vm->cores[0].shdw_pg_mode==SHADOW_PAGING) {
+ v3_register_shadow_paging_event_callback(vm,shadow_paging_callback,m);
+
+ for (i=0;i<vm->num_cores;i++) {
v3_invalidate_shadow_pts(&(vm->cores[i]));
+ }
+ } else if (vm->cores[0].shdw_pg_mode==NESTED_PAGING) {
+ //v3_register_nested_paging_event_callback(vm,nested_paging_callback,m);
+
+ for (i=0;i<vm->num_cores;i++) {
+ //v3_invalidate_nested_addr_range(&(vm->cores[i]),0,vm->mem_size-1);
+ }
+ } else {
+ PrintError(vm, VCORE_NONE, "Unsupported paging mode\n");
+ v3_bitmap_deinit(&(m->modified_pages));
+ V3_Free(m);
+ return 0;
}
// and now we should get callbacks as writes happen
static void stop_page_tracking(struct mem_migration_state *m)
{
- v3_unregister_shadow_paging_event_callback(m->vm,paging_callback,m);
-
- v3_bitmap_deinit(&(m->modified_pages));
+ if (m->vm->cores[0].shdw_pg_mode==SHADOW_PAGING) {
+ v3_unregister_shadow_paging_event_callback(m->vm,shadow_paging_callback,m);
+ } else {
+ //v3_unregister_nested_paging_event_callback(m->vm,nested_paging_callback,m);
+ }
- V3_Free(m);
+ v3_bitmap_deinit(&(m->modified_pages));
+
+ V3_Free(m);
}
PrintError(info->vm_info, info, "Could not open context to load core\n");
goto loadfailout;
}
+
+ // Run state is needed to determine when AP cores need
+ // to be immediately run after resume
+ V3_CHKPT_LOAD(ctx,"run_state",info->core_run_state,loadfailout);
V3_CHKPT_LOAD(ctx, "RIP", info->rip, loadfailout);
V3_CHKPT_LOAD(ctx, "GUEST_CR0", info->shdw_pg_state.guest_cr0, loadfailout);
V3_CHKPT_LOAD(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, loadfailout);
+ // floating point
+ if (v3_load_fp_state(ctx,info)) {
+ goto loadfailout;
+ }
+
v3_chkpt_close_ctx(ctx); ctx=0;
PrintDebug(info->vm_info, info, "Finished reading guest_info information\n");
goto savefailout;
}
+ V3_CHKPT_SAVE(ctx,"run_state",info->core_run_state,savefailout);
V3_CHKPT_SAVE(ctx, "RIP", info->rip, savefailout);
V3_CHKPT_SAVE(ctx, "GUEST_CR0", info->shdw_pg_state.guest_cr0, savefailout);
V3_CHKPT_SAVE(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, savefailout);
+ // floating point
+ if (v3_save_fp_state(ctx,info)) {
+ goto savefailout;
+ }
+
v3_chkpt_close_ctx(ctx); ctx=0;
if (opts & V3_CHKPT_OPT_SKIP_ARCHDEP) {
struct mem_migration_state *mm_state;
int i;
- // Currently will work only for shadow paging
- for (i=0;i<vm->num_cores;i++) {
- if (vm->cores[i].shdw_pg_mode!=SHADOW_PAGING && !(opts & V3_CHKPT_OPT_SKIP_MEM)) {
- PrintError(vm, VCORE_NONE, "Cannot currently handle nested paging\n");
- return -1;
+ // Cores must all be in the same mode
+ // or we must be skipping mmeory
+ if (!(opts & V3_CHKPT_OPT_SKIP_MEM)) {
+ v3_paging_mode_t mode = vm->cores[0].shdw_pg_mode;
+ for (i=1;i<vm->num_cores;i++) {
+ if (vm->cores[i].shdw_pg_mode != mode) {
+ PrintError(vm, VCORE_NONE, "Cores having different paging modes (nested and shadow) are not supported\n");
+ return -1;
+ }
}
}
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2013, Peter Dinda <pdinda@northwestern.edu>
+ * Copyright (c) 2013, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Peter Dinda <pdinda@northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_fp.h>
+#include <palacios/vm_guest.h>
+#include <palacios/vmm_lowlevel.h>
+
+#ifdef V3_CONFIG_CHECKPOINT
+#include <palacios/vmm_checkpoint.h>
+#endif
+
+
+static int can_do_fp=-1;
+
+// assumes identical on all cores...
+int v3_can_handle_fp_state()
+{
+ if (can_do_fp!=-1) {
+ return can_do_fp;
+ } else {
+ uint32_t eax, ebx, ecx, edx;
+
+ v3_cpuid(CPUID_FEATURE_IDS,&eax,&ebx,&ecx,&edx);
+
+ can_do_fp= !!(edx & (1<<25)); // do we have SSE?
+
+ return can_do_fp;
+ }
+}
+
+int v3_init_fp()
+{
+ if (v3_can_handle_fp_state()) {
+ V3_Print(VM_NONE,VCORE_NONE,"Floating point save/restore init: available on this hardware\n");
+ } else {
+ V3_Print(VM_NONE,VCORE_NONE,"Floating point save/restore init: UNAVAILABLE ON THIS HARDWARE\n");
+ }
+ return 0;
+}
+
+int v3_deinit_fp()
+{
+ V3_Print(VM_NONE,VCORE_NONE,"Floating point save/restore deinited\n");
+ return 0;
+}
+
+#define EFER_MSR 0xc0000080
+
+
+int v3_get_fp_state(struct guest_info *core)
+{
+ if (v3_can_handle_fp_state()) {
+ /*
+ If the fast-FXSAVE/FXRSTOR (FFXSR) feature is enabled in EFER, FXSAVE and FXRSTOR do not save or restore the XMM0–15 registers when executed in 64-bit mode at CPL 0. The x87 environment and MXCSR are saved whether fast-FXSAVE/FXRSTOR is enabled or not. Software can use the CPUID instruction to determine whether the fast-FXSAVE/FXRSTOR feature is available
+ (CPUID Fn8000_0001h_EDX[FFXSR]). The fast-FXSAVE/FXRSTOR feature has no effect on FXSAVE/FXRSTOR in non 64-bit mode or when CPL > 0.
+
+ */
+
+ // We need to assure that the fast-FXSAVE/FXRSTOR are not on
+ // otherwise we will NOT have the XMM regs since we running at CPL 0
+ //
+
+ int restore=0;
+ uint32_t high,low;
+
+ v3_get_msr(EFER_MSR,&high,&low);
+
+ if (low & (0x1<<14)) {
+ // fast save is in effect
+ low &= ~(0x1<<14);
+ restore=1;
+ v3_set_msr(EFER_MSR, high, low);
+ }
+
+ __asm__ __volatile__(" rex64/fxsave %0 ; "
+ : "=m"(core->fp_state.state)); /* no input, no clobber */
+ if (restore) {
+ low |= 0x1<<14;
+ v3_set_msr(EFER_MSR, high, low);
+ }
+
+ // this is a giant guess
+ // we really need to capture the state type as seen in the guest, not here...
+ core->fp_state.state_type=V3_FP_MODE_64;
+
+ return 0;
+
+ } else {
+ return -1;
+ }
+}
+
+
+// Restore FP state from this structure to this core
+int v3_put_fp_state(struct guest_info *core)
+{
+ if (v3_can_handle_fp_state()) {
+ // We need to assure that the fast-FXSAVE/FXRSTOR are not on
+ // otherwise we will NOT have the XMM regs since we running at CPL 0
+ //
+
+ int restore=0;
+ uint32_t high,low;
+
+ v3_get_msr(EFER_MSR,&high,&low);
+
+ if (low & (0x1<<14)) {
+ // fast restore is in effect
+ low &= ~(0x1<<14);
+ restore=1;
+ v3_set_msr(EFER_MSR, high, low);
+ }
+
+ __asm__ __volatile__(" rex64/fxrstor %0; "
+ : /* no output */
+ : "m"((core->fp_state.state)) ); /* no clobber*/
+
+
+ if (restore) {
+ low |= 0x1<<14;
+ v3_set_msr(EFER_MSR, high, low);
+ }
+
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
+#ifdef V3_CONFIG_CHECKPOINT
+
+
+int v3_save_fp_state(struct v3_chkpt_ctx *ctx, struct guest_info *core)
+{
+ V3_CHKPT_SAVE(ctx, "FP_STATE_TYPE", core->fp_state.state_type, savefailout);
+ if (v3_chkpt_save(ctx,"FP_STATE_BLOB",sizeof(core->fp_state.state),&(core->fp_state.state))) {
+ goto savefailout;
+ }
+
+ return 0;
+
+ savefailout:
+ PrintError(core->vm_info,core,"Unable to save floating point state\n");
+ return -1;
+}
+
+
+int v3_load_fp_state(struct v3_chkpt_ctx *ctx, struct guest_info *core)
+{
+ V3_CHKPT_LOAD(ctx, "FP_STATE_TYPE", core->fp_state.state_type, loadfailout);
+ if (v3_chkpt_load(ctx,"FP_STATE_BLOB",sizeof(core->fp_state.state),&(core->fp_state.state))) {
+ goto loadfailout;
+ }
+
+ return 0;
+
+ loadfailout:
+ PrintError(core->vm_info,core,"Unable to load floating point state\n");
+ return -1;
+}
+
+#endif
check_vmcs_write(VMCS_PREEMPT_TIMER, preempt_window);
}
-
+
+ V3_FP_ENTRY_RESTORE(info);
{
uint64_t entry_tsc = 0;
info->num_exits++;
+ V3_FP_EXIT_SAVE(info);
+
/* If we have the preemption time, then use it to get more accurate guest time */
if (vmx_info->pin_ctrls.active_preempt_timer) {
uint32_t cycles_left = 0;
} else {
PrintDebug(info->vm_info, info, "VMX core %u: Waiting for core initialization\n", info->vcpu_id);
+
+ V3_NO_WORK(info);
while (info->core_run_state == CORE_STOPPED) {
// The VM was stopped before this core was initialized.
return 0;
}
-
- v3_yield(info,-1);
+
+ V3_STILL_NO_WORK(info);
//PrintDebug(info->vm_info, info, "VMX core %u: still waiting for INIT\n",info->vcpu_id);
}
-
+
+ V3_HAVE_WORK_AGAIN(info);
+
PrintDebug(info->vm_info, info, "VMX core %u initialized\n", info->vcpu_id);
// We'll be paranoid about race conditions here