source "Kconfig.stdlibs"
-
menu "Virtual Paging"
config SHADOW_PAGING
endmenu
+menu "Time Management"
+
+config TIME_VIRTUALIZE_TSC
+ bool "Virtualize guest TSC"
+ default n
+ help
+ Virtualize the processor time stamp counter in the guest,
+ generally increasing consistency between various time sources
+ but also potentially making guest time run slower than real time.
+
+endmenu
+
menu "Symbiotic Functions"
config SYMBIOTIC
help
This turns on debugging for the interrupt system
-
+config DEBUG_TIME
+ bool "Timing"
+ default n
+ depends on DEBUG_ON
+ help
+ This turns on debugging of system time virtualization
config DEBUG_IO
bool "IO"
ALLSOURCE_ARCHS := $(ARCH)
define all-sources
- ( find $(__srctree)/palacios $(RCS_FIND_IGNORE) \
+ ( find $(__srctree)palacios $(RCS_FIND_IGNORE) \
\( -name lib \) -prune -o \
-name '*.[chS]' -print; )
endef
struct v3_sym_core_state sym_core_state;
#endif
+ /* Per-core config tree data. */
+ v3_cfg_tree_t * core_cfg_data;
struct v3_vm_info * vm_info;
#include <palacios/vmm_types.h>
#include <palacios/vmm_list.h>
+#include <palacios/vmm_msr.h>
+#include <palacios/vmm_util.h>
struct guest_info;
struct vm_time {
- uint32_t cpu_freq; // in kHZ
-
- // Total number of guest run time cycles
- uint64_t guest_tsc;
-
- // Cache value to help calculate the guest_tsc
- uint64_t cached_host_tsc;
-
- // The number of cycles pending for notification to the timers
- //ullong_t pending_cycles;
-
- // Installed Timers
+ uint32_t host_cpu_freq; // in kHZ
+ uint32_t guest_cpu_freq; // can be lower than host CPU freq!
+
+ sint64_t guest_host_offset;// Offset of monotonic guest time from host time
+ sint64_t tsc_guest_offset; // Offset of guest TSC from monotonic guest time
+
+ uint64_t last_update; // Last time (in monotonic guest time) the
+ // timers were updated
+
+ uint64_t initial_time; // Time when VMM started.
+
+ struct v3_msr tsc_aux; // Auxilliary MSR for RDTSCP
+
+ // Installed Timers slaved off of the guest monotonic TSC
uint_t num_timers;
struct list_head timers;
};
-
-
-
struct vm_timer_ops {
- void (*update_time)(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * priv_data);
+ void (*update_timer)(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * priv_data);
void (*advance_timer)(struct guest_info * info, void * private_data);
};
struct list_head timer_link;
};
+// Basic functions for handling passage of time in palacios
+void v3_init_time(struct guest_info * info);
+int v3_start_time(struct guest_info * info);
+int v3_adjust_time(struct guest_info * info);
-
-
+// Basic functions for attaching timers to the passage of time
int v3_add_timer(struct guest_info * info, struct vm_timer_ops * ops, void * private_data);
int v3_remove_timer(struct guest_info * info, struct vm_timer * timer);
+void v3_update_timers(struct guest_info * info);
-void v3_advance_time(struct guest_info * info);
+// Functions to return the different notions of time in Palacios.
+static inline uint64_t v3_get_host_time(struct vm_time *t) {
+ uint64_t tmp;
+ rdtscll(tmp);
+ return tmp;
+}
+
+// Returns *monotonic* guest time.
+static inline uint64_t v3_get_guest_time(struct vm_time *t) {
+ return v3_get_host_time(t) + t->guest_host_offset;
+}
+
+// Returns the TSC value seen by the guest
+static inline uint64_t v3_get_guest_tsc(struct vm_time *t) {
+ return v3_get_guest_time(t) + t->tsc_guest_offset;
+}
+
+// Returns offset of guest TSC from host TSC
+static inline sint64_t v3_tsc_host_offset(struct vm_time *time_state) {
+ return time_state->guest_host_offset + time_state->tsc_guest_offset;
+}
+
+// Functions for handling exits on the TSC when fully virtualizing
+// the timestamp counter.
+#define TSC_MSR 0x10
+#define TSC_AUX_MSR 0xC0000103
+
+int v3_handle_rdtscp(struct guest_info *info);
+int v3_handle_rdtsc(struct guest_info *info);
-void v3_update_time(struct guest_info * info, ullong_t cycles);
-void v3_init_time(struct guest_info * info);
#endif // !__V3VEE__
#include <palacios/vm_guest.h>
-static void pit_update_time(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * private_data) {
+static void pit_update_timer(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * private_data) {
struct vm_device * dev = (struct vm_device *)private_data;
struct pit * state = (struct pit *)dev->private_data;
// ullong_t tmp_ctr = state->pit_counter;
return;
}
-
-static void pit_advance_time(struct guest_info * core, void * private_data) {
-
- v3_raise_irq(core->vm_info, 0);
-}
-
-
-
/* This should call out to handle_SQR_WAVE_write, etc...
*/
static int handle_channel_write(struct channel * ch, char val) {
static struct vm_timer_ops timer_ops = {
- .update_time = pit_update_time,
- .advance_timer = pit_advance_time,
+ .update_timer = pit_update_timer,
};
static struct vm_timer_ops timer_ops = {
- .update_time = apic_update_time,
+ .update_timer = apic_update_time,
};
// Sanity check
if (core->cpu_mode != INIT) {
- PrintError("icc_bus: Warning: core %u is not in INIT state, ignored\n",core->cpu_id);
+ PrintError("icc_bus: Warning: core %u is not in INIT state (mode = %d), ignored\n",core->cpu_id, core->cpu_mode);
// Only a warning, since INIT INIT SIPI is common
break;
}
// Sanity check
if (core->cpu_mode!=SIPI) {
- PrintError("icc_bus: core %u is not in SIPI state, ignored!\n",core->cpu_id);
+ PrintError("icc_bus: core %u is not in SIPI state (mode = %d), ignored!\n",core->cpu_id, core->cpu_mode);
break;
}
int v3_icc_send_ipi(struct vm_device * icc_bus, uint32_t src_apic, uint64_t icr_data, uint32_t extirq) {
- PrintDebug("icc_bus: icc_bus=%p, src_apic=%u, icr_data=%llx, extirq=%u\n",icc_bus,src_apic,icr_data,extirq);
-
struct int_cmd_reg *icr = (struct int_cmd_reg *)&icr_data;
struct icc_bus_state * state = (struct icc_bus_state *)icc_bus->private_data;
//
-
-
ctrl_area->svm_instrs.VMRUN = 1;
ctrl_area->svm_instrs.VMMCALL = 1;
ctrl_area->svm_instrs.VMLOAD = 1;
ctrl_area->instrs.CPUID = 1;
ctrl_area->instrs.HLT = 1;
+
+#ifdef CONFIG_TIME_VIRTUALIZE_TSC
+ ctrl_area->instrs.rdtsc = 1;
+ ctrl_area->svm_instrs.rdtscp = 1;
+#endif
+
// guest_state->cr0 = 0x00000001; // PE
/*
int v3_svm_enter(struct guest_info * info) {
vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
- ullong_t tmp_tsc;
addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
// Conditionally yield the CPU if the timeslice has expired
}
#endif
+ v3_update_timers(info);
- rdtscll(tmp_tsc);
- v3_update_time(info, (tmp_tsc - info->time_state.cached_host_tsc));
- rdtscll(info->time_state.cached_host_tsc);
- // guest_ctrl->TSC_OFFSET = info->time_state.guest_tsc - info->time_state.cached_host_tsc;
+ /* If this guest is frequency-lagged behind host time, wait
+ * for the appropriate host time before resuming the guest. */
+ v3_adjust_time(info);
- //V3_Print("Calling v3_svm_launch\n");
+ guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
+ //V3_Print("Calling v3_svm_launch\n");
v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[info->cpu_id]);
-
- //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
+ //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
- //rdtscll(tmp_tsc);
- // v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc);
-
//PrintDebug("SVM Returned\n");
info->num_exits++;
-
-
-
// Save Guest state from VMCB
info->rip = guest_state->rip;
info->vm_regs.rsp = guest_state->rsp;
//PrintDebugVMCB((vmcb_t*)(info->vmm_data));
info->vm_info->run_state = VM_RUNNING;
- rdtscll(info->yield_start_cycle);
-
+ v3_start_time(info);
while (1) {
if (v3_svm_enter(info) == -1) {
return -1;
}
break;
-
-
-
+ case VMEXIT_RDTSC:
+#ifdef CONFIG_DEBUG_TIME
+ PrintDebug("RDTSC/RDTSCP\n");
+#endif
+ if (v3_handle_rdtsc(info) == -1) {
+ PrintError("Error Handling RDTSC instruction\n");
+ return -1;
+ }
+ break;
+ case VMEXIT_RDTSCP:
+#ifdef CONFIG_DEBUG_TIME
+ PrintDebug("RDTSCP\n");
+#endif
+ if (v3_handle_rdtscp(info) == -1) {
+ PrintError("Error Handling RDTSCP instruction\n");
+ return -1;
+ }
+ break;
/* Exits Following this line are NOT HANDLED */
#include <palacios/vmcb.h>
static int info_hcall(struct guest_info * core, uint_t hcall_id, void * priv_data) {
v3_cpu_arch_t cpu_type = v3_get_cpu_type(v3_get_cpu_id());
-
+ int cpu_valid = 0;
+
v3_print_guest_state(core);
-
// init SVM/VMX
#ifdef CONFIG_SVM
if ((cpu_type == V3_SVM_CPU) || (cpu_type == V3_SVM_REV3_CPU)) {
+ cpu_valid = 1;
PrintDebugVMCB((vmcb_t *)(core->vmm_data));
}
#endif
#ifdef CONFIG_VMX
- else if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
+ if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
+ cpu_valid = 1;
v3_print_vmcs();
}
#endif
- else {
- PrintError("Invalid CPU Type\n");
+ if (!cpu_valid) {
+ PrintError("Invalid CPU Type 0x%x\n", cpu_type);
return -1;
}
int v3_init_vm(struct v3_vm_info * vm) {
v3_cpu_arch_t cpu_type = v3_get_cpu_type(v3_get_cpu_id());
-
+ int cpu_valid = 0;
if (v3_get_foreground_vm() == NULL) {
v3_set_foreground_vm(vm);
if ((cpu_type == V3_SVM_CPU) || (cpu_type == V3_SVM_REV3_CPU)) {
v3_init_svm_io_map(vm);
v3_init_svm_msr_map(vm);
- }
+ cpu_valid = 1;
+ }
#endif
#ifdef CONFIG_VMX
- else if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
+ if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
v3_init_vmx_io_map(vm);
v3_init_vmx_msr_map(vm);
+ cpu_valid = 1;
}
#endif
- else {
- PrintError("Invalid CPU Type\n");
+ if (!cpu_valid) {
+ PrintError("Invalid CPU Type 0x%x\n", cpu_type);
return -1;
}
-
-
v3_register_hypercall(vm, GUEST_INFO_HCALL, info_hcall, NULL);
-
V3_Print("GUEST_INFO_HCALL=%x\n", GUEST_INFO_HCALL);
return 0;
int v3_init_core(struct guest_info * core) {
v3_cpu_arch_t cpu_type = v3_get_cpu_type(v3_get_cpu_id());
+ int cpu_valid = 0;
struct v3_vm_info * vm = core->vm_info;
/*
PrintError("Error in SVM initialization\n");
return -1;
}
+ cpu_valid = 1;
}
#endif
#ifdef CONFIG_VMX
- else if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
+ if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
if (v3_init_vmx_vmcs(core, vm->vm_class) == -1) {
PrintError("Error in VMX initialization\n");
return -1;
}
+ cpu_valid = 1;
}
#endif
- else {
- PrintError("Invalid CPU Type\n");
+ if (!cpu_valid) {
+ PrintError("Invalid CPU Type 0x%x\n", cpu_type);
return -1;
}
void v3_yield_cond(struct guest_info * info) {
uint64_t cur_cycle;
- rdtscll(cur_cycle);
+ cur_cycle = v3_get_host_time(&info->time_state);
if (cur_cycle > (info->yield_start_cycle + info->vm_info->yield_cycle_period)) {
(void *)cur_cycle, (void *)info->yield_start_cycle, (void *)info->yield_cycle_period);
*/
V3_Yield();
- rdtscll(info->yield_start_cycle);
+ info->yield_start_cycle = v3_get_host_time(&info->time_state);
}
}
V3_Yield();
if (info) {
- rdtscll(info->yield_start_cycle);
+ info->yield_start_cycle = v3_get_host_time(&info->time_state);
}
}
info->cpu_id = i;
info->vm_info = vm;
+ info->core_cfg_data = per_core_cfg;
if (pre_config_core(info, per_core_cfg) == -1) {
PrintError("Error in core %d preconfiguration\n", i);
if (info->cpl != 0) {
v3_raise_exception(info, GPF_EXCEPTION);
} else {
- uint64_t yield_start = 0;
-
PrintDebug("CPU Yield\n");
while (!v3_intr_pending(info)) {
- rdtscll(yield_start);
+ /* Since we're in an exit, time is already paused here, so no need to pause again. */
v3_yield(info);
-
- v3_update_time(info, yield_start - info->time_state.cached_host_tsc);
-
- rdtscll(info->time_state.cached_host_tsc);
+ v3_update_timers(info);
/* At this point, we either have some combination of
interrupts, including perhaps a timer interrupt, or
void v3_init_msr_map(struct v3_vm_info * vm) {
struct v3_msr_map * msr_map = &(vm->msr_map);
+ PrintDebug("Initializing MSR map.\n");
+
INIT_LIST_HEAD(&(msr_map->hook_list));
msr_map->num_hooks = 0;
* All rights reserved.
*
* Author: Jack Lange <jarusl@cs.northwestern.edu>
+ * Patrick G. Bridges <bridges@cs.unm.edu>
*
* This is free software. You are permitted to use,
* redistribute, and modify it as specified in the file "V3VEE_LICENSE".
#include <palacios/vmm.h>
#include <palacios/vm_guest.h>
+#ifndef CONFIG_DEBUG_TIME
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+/* Overview
+ *
+ * Time handling in VMMs is challenging, and Palacios uses the highest
+ * resolution, lowest overhead timer on modern CPUs that it can - the
+ * processor timestamp counter (TSC). Note that on somewhat old processors
+ * this can be problematic; in particular, older AMD processors did not
+ * have a constant rate timestamp counter in the face of power management
+ * events. However, the latest Intel and AMD CPUs all do (should...) have a
+ * constant rate TSC, and Palacios relies on this fact.
+ *
+ * Basically, Palacios keeps track of three quantities as it runs to manage
+ * the passage of time:
+ * (1) The host timestamp counter - read directly from HW and never written
+ * (2) A monotonic guest timestamp counter used to measure the progression of
+ * time in the guest. This is computed using an offsets from (1) above.
+ * (3) The actual guest timestamp counter (which can be written by
+ * writing to the guest TSC MSR - MSR 0x10) from the monotonic guest TSC.
+ * This is also computed as an offset from (2) above when the TSC and
+ * this offset is updated when the TSC MSR is written.
+ *
+ * The value used to offset the guest TSC from the host TSC is the *sum* of all
+ * of these offsets (2 and 3) above
+ *
+ * Because all other devices are slaved off of the passage of time in the guest,
+ * it is (2) above that drives the firing of other timers in the guest,
+ * including timer devices such as the Programmable Interrupt Timer (PIT).
+ *
+ * Future additions:
+ * (1) Add support for temporarily skewing guest time off of where it should
+ * be to support slack simulation of guests. The idea is that simulators
+ * set this skew to be the difference between how much time passed for a
+ * simulated feature and a real implementation of that feature, making
+ * pass at a different rate from real time on this core. The VMM will then
+ * attempt to move this skew back towards 0 subject to resolution/accuracy
+ * constraints from various system timers.
+ *
+ * The main effort in doing this will be to get accuracy/resolution
+ * information from each local timer and to use this to bound how much skew
+ * is removed on each exit.
+ */
+
+
static int handle_cpufreq_hcall(struct guest_info * info, uint_t hcall_id, void * priv_data) {
struct vm_time * time_state = &(info->time_state);
- info->vm_regs.rbx = time_state->cpu_freq;
+ info->vm_regs.rbx = time_state->guest_cpu_freq;
PrintDebug("Guest request cpu frequency: return %ld\n", (long)info->vm_regs.rbx);
-void v3_init_time(struct guest_info * info) {
+int v3_start_time(struct guest_info * info) {
+ /* We start running with guest_time == host_time */
+ uint64_t t = v3_get_host_time(&info->time_state);
+
+ PrintDebug("Starting initial guest time as %llu\n", t);
+ info->time_state.last_update = t;
+ info->time_state.initial_time = t;
+ info->yield_start_cycle = t;
+ return 0;
+}
+
+// If the guest is supposed to run slower than the host, yield out until
+// the host time is appropriately far along;
+int v3_adjust_time(struct guest_info * info) {
struct vm_time * time_state = &(info->time_state);
- time_state->cpu_freq = V3_CPU_KHZ();
-
- time_state->guest_tsc = 0;
- time_state->cached_host_tsc = 0;
- // time_state->pending_cycles = 0;
-
- INIT_LIST_HEAD(&(time_state->timers));
- time_state->num_timers = 0;
+ if (time_state->host_cpu_freq == time_state->guest_cpu_freq) {
+ time_state->guest_host_offset = 0;
+ } else {
+ uint64_t guest_time, guest_elapsed, desired_elapsed;
+ uint64_t host_time, target_host_time;
- v3_register_hypercall(info->vm_info, TIME_CPUFREQ_HCALL, handle_cpufreq_hcall, NULL);
-}
+ guest_time = v3_get_guest_time(time_state);
+ /* Compute what host time this guest time should correspond to. */
+ guest_elapsed = (guest_time - time_state->initial_time);
+ desired_elapsed = (guest_elapsed * time_state->host_cpu_freq) / time_state->guest_cpu_freq;
+ target_host_time = time_state->initial_time + desired_elapsed;
-int v3_add_timer(struct guest_info * info, struct vm_timer_ops * ops, void * private_data) {
+ /* Yield until that host time is reached */
+ host_time = v3_get_host_time(time_state);
+ while (host_time < target_host_time) {
+ v3_yield(info);
+ host_time = v3_get_host_time(time_state);
+ }
+
+ time_state->guest_host_offset = (sint64_t)guest_time - (sint64_t)host_time;
+ }
+ return 0;
+}
+
+int v3_add_timer(struct guest_info * info, struct vm_timer_ops * ops,
+ void * private_data) {
struct vm_timer * timer = NULL;
timer = (struct vm_timer *)V3_Malloc(sizeof(struct vm_timer));
V3_ASSERT(timer != NULL);
return 0;
}
-
int v3_remove_timer(struct guest_info * info, struct vm_timer * timer) {
list_del(&(timer->timer_link));
info->time_state.num_timers--;
return 0;
}
+void v3_update_timers(struct guest_info * info) {
+ struct vm_timer * tmp_timer;
+ uint64_t old_time = info->time_state.last_update;
+ uint64_t cycles;
+ info->time_state.last_update = v3_get_guest_time(&info->time_state);
+ cycles = info->time_state.last_update - old_time;
-void v3_update_time(struct guest_info * info, uint64_t cycles) {
- struct vm_timer * tmp_timer;
+ list_for_each_entry(tmp_timer, &(info->time_state.timers), timer_link) {
+ tmp_timer->ops->update_timer(info, cycles, info->time_state.guest_cpu_freq, tmp_timer->private_data);
+ }
+}
+
+/*
+ * Handle full virtualization of the time stamp counter. As noted
+ * above, we don't store the actual value of the TSC, only the guest's
+ * offset from monotonic guest's time. If the guest writes to the TSC, we
+ * handle this by changing that offset.
+ *
+ * Possible TODO: Proper hooking of TSC read/writes?
+ */
+
+int v3_rdtsc(struct guest_info * info) {
+ uint64_t tscval = v3_get_guest_tsc(&info->time_state);
+ info->vm_regs.rdx = tscval >> 32;
+ info->vm_regs.rax = tscval & 0xffffffffLL;
+ return 0;
+}
+
+int v3_handle_rdtsc(struct guest_info * info) {
+ v3_rdtsc(info);
- // cycles *= 8;
+ info->vm_regs.rax &= 0x00000000ffffffffLL;
+ info->vm_regs.rdx &= 0x00000000ffffffffLL;
-// cycles /= 150;
+ info->rip += 2;
+
+ return 0;
+}
- info->time_state.guest_tsc += cycles;
+int v3_rdtscp(struct guest_info * info) {
+ int ret;
+ /* First get the MSR value that we need. It's safe to futz with
+ * ra/c/dx here since they're modified by this instruction anyway. */
+ info->vm_regs.rcx = TSC_AUX_MSR;
+ ret = v3_handle_msr_read(info);
+ if (ret) return ret;
+ info->vm_regs.rcx = info->vm_regs.rax;
- list_for_each_entry(tmp_timer, &(info->time_state.timers), timer_link) {
- tmp_timer->ops->update_time(info, cycles, info->time_state.cpu_freq, tmp_timer->private_data);
- }
-
+ /* Now do the TSC half of the instruction */
+ ret = v3_rdtsc(info);
+ if (ret) return ret;
+
+ return 0;
+}
+
+
+int v3_handle_rdtscp(struct guest_info * info) {
+
+ v3_rdtscp(info);
+
+ info->vm_regs.rax &= 0x00000000ffffffffLL;
+ info->vm_regs.rcx &= 0x00000000ffffffffLL;
+ info->vm_regs.rdx &= 0x00000000ffffffffLL;
+
+ info->rip += 3;
+
+ return 0;
+}
+static int tsc_aux_msr_read_hook(struct guest_info *info, uint_t msr_num,
+ struct v3_msr *msr_val, void *priv) {
+ struct vm_time * time_state = &(info->time_state);
+
+ V3_ASSERT(msr_num == TSC_AUX_MSR);
+ msr_val->lo = time_state->tsc_aux.lo;
+ msr_val->hi = time_state->tsc_aux.hi;
- //info->time_state.pending_cycles = 0;
+ return 0;
+}
+
+static int tsc_aux_msr_write_hook(struct guest_info *info, uint_t msr_num,
+ struct v3_msr msr_val, void *priv) {
+ struct vm_time * time_state = &(info->time_state);
+
+ V3_ASSERT(msr_num == TSC_AUX_MSR);
+ time_state->tsc_aux.lo = msr_val.lo;
+ time_state->tsc_aux.hi = msr_val.hi;
+
+ return 0;
+}
+
+static int tsc_msr_read_hook(struct guest_info *info, uint_t msr_num,
+ struct v3_msr *msr_val, void *priv) {
+ uint64_t time = v3_get_guest_tsc(&info->time_state);
+
+ V3_ASSERT(msr_num == TSC_MSR);
+ msr_val->hi = time >> 32;
+ msr_val->lo = time & 0xffffffffLL;
+
+ return 0;
+}
+
+static int tsc_msr_write_hook(struct guest_info *info, uint_t msr_num,
+ struct v3_msr msr_val, void *priv) {
+ struct vm_time * time_state = &(info->time_state);
+ uint64_t guest_time, new_tsc;
+ V3_ASSERT(msr_num == TSC_MSR);
+ new_tsc = (((uint64_t)msr_val.hi) << 32) | (uint64_t)msr_val.lo;
+ guest_time = v3_get_guest_time(time_state);
+ time_state->tsc_guest_offset = (sint64_t)new_tsc - (sint64_t)guest_time;
+
+ return 0;
}
-void v3_advance_time(struct guest_info * core) {
- struct vm_timer * tmp_timer;
+static int init_vm_time(struct v3_vm_info *vm_info) {
+ int ret;
- list_for_each_entry(tmp_timer, &(core->time_state.timers), timer_link) {
- tmp_timer->ops->advance_timer(core, tmp_timer->private_data);
+ PrintDebug("Installing TSC MSR hook.\n");
+ ret = v3_hook_msr(vm_info, TSC_MSR,
+ tsc_msr_read_hook, tsc_msr_write_hook, NULL);
+
+ PrintDebug("Installing TSC_AUX MSR hook.\n");
+ if (ret) return ret;
+ ret = v3_hook_msr(vm_info, TSC_AUX_MSR, tsc_aux_msr_read_hook,
+ tsc_aux_msr_write_hook, NULL);
+ if (ret) return ret;
+
+ PrintDebug("Registering TIME_CPUFREQ hypercall.\n");
+ ret = v3_register_hypercall(vm_info, TIME_CPUFREQ_HCALL,
+ handle_cpufreq_hcall, NULL);
+ return ret;
+}
+
+void v3_init_time(struct guest_info * info) {
+ struct vm_time * time_state = &(info->time_state);
+ v3_cfg_tree_t * cfg_tree = info->core_cfg_data;
+ static int one_time = 0;
+ char *khz;
+
+ time_state->host_cpu_freq = V3_CPU_KHZ();
+ khz = v3_cfg_val(cfg_tree, "khz");
+ if (khz) {
+ time_state->guest_cpu_freq = atoi(khz);
+ PrintDebug("Core %d CPU frequency requested at %d khz.\n",
+ info->cpu_id, time_state->guest_cpu_freq);
+ }
+
+ if (!khz || time_state->guest_cpu_freq > time_state->host_cpu_freq) {
+ time_state->guest_cpu_freq = time_state->host_cpu_freq;
}
-
+ PrintDebug("Core %d CPU frequency set to %d KHz (host CPU frequency = %d KHz).\n", info->cpu_id, time_state->guest_cpu_freq, time_state->host_cpu_freq);
+ time_state->initial_time = 0;
+ time_state->last_update = 0;
+ time_state->guest_host_offset = 0;
+ time_state->tsc_guest_offset = 0;
+ INIT_LIST_HEAD(&(time_state->timers));
+ time_state->num_timers = 0;
+
+ time_state->tsc_aux.lo = 0;
+ time_state->tsc_aux.hi = 0;
+
+ if (!one_time) {
+ init_vm_time(info->vm_info);
+ one_time = 1;
+ }
}
+
+
+
+
+
+
+
+
+
vmx_state->pri_proc_ctrls.invlpg_exit = 1;
vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
vmx_state->pri_proc_ctrls.pause_exit = 1;
+ vmx_state->pri_proc_ctrls.tsc_offset = 1;
+#ifdef CONFIG_TIME_VIRTUALIZE_TSC
+ vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
+#endif
vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->vm_info->io_map.arch_data));
vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR,
*/
int v3_vmx_enter(struct guest_info * info) {
int ret = 0;
- uint64_t tmp_tsc = 0;
+ uint32_t tsc_offset_low, tsc_offset_high;
struct vmx_exit_info exit_info;
// Conditionally yield the CPU if the timeslice has expired
v3_yield_cond(info);
-
// v3_print_guest_state(info);
// disable global interrupts for vm state transition
vmcs_write(VMCS_GUEST_CR3, guest_cr3);
}
- // We do timer injection here to track real host time.
- rdtscll(tmp_tsc);
- v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc);
- rdtscll(info->time_state.cached_host_tsc);
+ v3_update_timers(info);
+
+ /* If this guest is frequency-lagged behind host time, wait
+ * for the appropriate host time before resuming the guest. */
+ v3_adjust_time(info);
+
+ tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
+ tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
+ check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
+ check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
if (info->vm_info->run_state == VM_STOPPED) {
info->vm_info->run_state = VM_RUNNING;
return -1;
}
- // rdtscll(tmp_tsc);
- // v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc);
-
info->num_exits++;
-
/* Update guest state */
v3_vmx_save_vmcs(info);
int v3_start_vmx_guest(struct guest_info* info) {
+ PrintDebug("Starting VMX core %u\n",info->cpu_id);
+ if (info->cpu_mode==INIT) {
+ PrintDebug("VMX core %u: I am an AP in INIT mode, waiting for that to change\n",info->cpu_id);
+ while (info->cpu_mode==INIT) {
+ v3_yield(info);
+ //PrintDebug("VMX core %u: still waiting for INIT\n",info->cpu_id);
+ }
+ PrintDebug("VMX core %u: I am out of INIT\n",info->cpu_id);
+ if (info->cpu_mode==SIPI) {
+ PrintDebug("VMX core %u: I am waiting on a SIPI to set my starting address\n",info->cpu_id);
+ while (info->cpu_mode==SIPI) {
+ v3_yield(info);
+ //PrintDebug("VMX core %u: still waiting for SIPI\n",info->cpu_id);
+ }
+ }
+ PrintDebug("VMX core %u: I have my SIPI\n", info->cpu_id);
+ }
+
+ if (info->cpu_mode!=REAL) {
+ PrintError("VMX core %u: I am not in REAL mode at launch! Huh?!\n", info->cpu_id);
+ return -1;
+ }
+
+ PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x), RIP=0x%p\n",
+ info->cpu_id, info->segments.cs.selector, (void*)(info->segments.cs.base),
+ info->segments.cs.limit,(void*)(info->rip));
+
- PrintDebug("Launching VMX guest\n");
- rdtscll(info->time_state.cached_host_tsc);
+ PrintDebug("VMX core %u: Launching VMX VM\n", info->cpu_id);
+ v3_start_time(info);
while (1) {
if (v3_vmx_enter(info) == -1) {
}
break;
+
+ case VMEXIT_RDTSC:
+#ifdef CONFIG_DEBUG_TIME
+ PrintDebug("RDTSC\n");
+#endif
+ if (v3_handle_rdtsc(info) == -1) {
+ PrintError("Error Handling RDTSC instruction\n");
+ return -1;
+ }
+
+ break;
+
case VMEXIT_CPUID:
if (v3_handle_cpuid(info) == -1) {
PrintError("Error Handling CPUID instruction\n");