#include <palacios/vmm_types.h>
#include <palacios/vmm_list.h>
+#include <palacios/vmm_msr.h>
#include <palacios/vmm_util.h>
struct guest_info;
struct vm_time {
- uint32_t cpu_freq; // in kHZ
-
- uint64_t last_update; // Last time (in guest time) the timers were updated
- uint64_t pause_time; // Cache value to help calculate the guest_tsc
- sint64_t host_offset; // Offset of guest time from host time.
- sint64_t offset_sum; // Sum of past and current host_offsets
-
- // Installed Timers
+ uint32_t cpu_freq; // in kHZ in terms of guest CPU speed
+ // which ideally can be different lower than
+ // host CPU speed!
+
+ uint32_t time_mult; // Fields for computing monotonic guest time
+ uint32_t time_div; // from host (tsc) time
+ sint64_t time_offset;
+
+ sint64_t tsc_time_offset; // Offset for computing guest TSC value from
+ // monotonic guest time
+
+ uint64_t last_update; // Last time (in monotonic guest time) the
+ // timers were updated
+
+ uint64_t pause_time; // Cache value to help calculate the guest_tsc
+
+ struct v3_msr tsc_aux; // Auxilliary MSR for RDTSCP
+
+ // Installed Timers slaved off of the guest monotonic TSC
uint_t num_timers;
struct list_head timers;
};
int v3_pause_time(struct guest_info * info);
int v3_resume_time(struct guest_info * info);
+// Returns host time
static inline uint64_t v3_get_host_time(struct vm_time *t) {
uint64_t tmp;
rdtscll(tmp);
return tmp;
}
+// Returns *monotonic* guest time.
static inline uint64_t v3_get_guest_time(struct vm_time *t) {
if (t->pause_time) return t->pause_time;
- else return v3_get_host_time(t) + t->host_offset;
+ else return v3_get_host_time(t) + t->time_offset;
+}
+
+// Returns the TSC value seen by the guest
+static inline uint64_t v3_get_guest_tsc(struct vm_time *t) {
+ return v3_get_guest_time(t) + t->tsc_time_offset;
}
+
+#define TSC_MSR 0x10
+#define TSC_AUX_MSR 0xC0000103
+
+int v3_handle_rdtscp(struct guest_info *info);
+int v3_handle_rdtsc(struct guest_info *info);
+
#endif // !__V3VEE__
#endif
#define PrintDebug(fmt, args...)
#endif
+/* Overview
+ *
+ * Time handling in VMMs is challenging, and Palacios uses the highest
+ * resolution, lowest overhead timer on modern CPUs that it can - the
+ * processor timestamp counter (TSC). Note that on somewhat old processors
+ * this can be problematic; in particular, older AMD processors did not
+ * have a constant rate timestamp counter in the face of power management
+ * events. However, the latest Intel and AMD CPUs all do (should...) have a
+ * constant rate TSC, and Palacios relies on this fact.
+ *
+ * Basically, Palacios keeps track of three quantities as it runs to manage
+ * the passage of time:
+ * (1) The host timestamp counter - read directly from HW and never written
+ * (2) A monotonic guest timestamp counter used to measure the progression of
+ * time in the guest. This is computed as a multipler/offset from (1) above
+ * (3) The actual guest timestamp counter (which can be written by
+ * writing to the guest TSC MSR - MSR 0x10) from the monotonic guest TSC.
+ * This is also computed as an offset from (2) above when the TSC and
+ * this offset is updated when the TSC MSR is written.
+ *
+ * Because all other devices are slaved off of the passage of time in the guest,
+ * it is (2) above that drives the firing of other timers in the guest,
+ * including timer devices such as the Programmable Interrupt Timer (PIT).
+ *
+ *
+ *
+ */
+
+
static int handle_cpufreq_hcall(struct guest_info * info, uint_t hcall_id, void * priv_data) {
struct vm_time * time_state = &(info->time_state);
-void v3_init_time(struct guest_info * info) {
- struct vm_time * time_state = &(info->time_state);
-
- time_state->cpu_freq = V3_CPU_KHZ();
-
- time_state->pause_time = 0;
- time_state->last_update = 0;
- time_state->host_offset = 0;
- time_state->offset_sum = 0;
-
- INIT_LIST_HEAD(&(time_state->timers));
- time_state->num_timers = 0;
-
- v3_register_hypercall(info->vm_info, TIME_CPUFREQ_HCALL, handle_cpufreq_hcall, NULL);
-}
-
int v3_start_time(struct guest_info * info) {
/* We start running with guest_time == host_time */
uint64_t t = v3_get_host_time(&info->time_state);
return 0;
}
-/* Use a control-theoretic approach, specifically a PI control approach,
- * to adjust host_offset towards 0. Overall control documentation in
- * palacios/docs/time_control.tex Control parameters are P and I,
- * broken into rational numbers
- */
-
-/* These numbers need to be actually determined by pole placement work. They're
- * just blind guesses for now, which is a really bad idea. :) */
-#define P_NUM 1
-#define P_DENOM 2
-#define I_NUM 1
-#define I_DENOM 20
-
-void adjust_time_offset(struct guest_info * info) {
- /* Set point for control: Desired offset = 0;
- * Error = host_offset - 0 = host_offset */
-
- sint64_t adjust;
-
- /* Update the integral of the errror */
- info->time_state.offset_sum += info->time_state.host_offset;
- adjust = (P_NUM * info->time_state.host_offset) / P_DENOM +
- (I_NUM * info->time_state.offset_sum) / I_DENOM;
-
- /* We may want to constrain *adjust* because of
- * resolution/accuracy constraints. Explore that later. */
- info->time_state.host_offset -= adjust;
- return;
-}
-
int v3_resume_time(struct guest_info * info) {
uint64_t t = v3_get_host_time(&info->time_state);
V3_ASSERT(info->time_state.pause_time != 0);
- info->time_state.host_offset =
+ info->time_state.time_offset =
(sint64_t)info->time_state.pause_time - (sint64_t)t;
-#ifdef CONFIG_TIME_TSC_OFFSET_ADJUST
- adjust_time_offset(info);
-#endif
info->time_state.pause_time = 0;
PrintDebug("Time resumed paused at guest time as %llu "
- "offset %lld from host time.\n", t, info->time_state.host_offset);
+ "offset %lld from host time.\n", t, info->time_state.time_offset);
return 0;
}
tmp_timer->ops->update_timer(info, cycles, info->time_state.cpu_freq, tmp_timer->private_data);
}
}
+
+
+/*
+ * Handle full virtualization of the time stamp counter. As noted
+ * above, we don't store the actual value of the TSC, only the guest's
+ * offset from the host TSC. If the guest write's the to TSC, we handle
+ * this by changing that offset.
+ */
+
+int v3_rdtsc(struct guest_info * info) {
+ uint64_t tscval = v3_get_guest_tsc(&info->time_state);
+ info->vm_regs.rdx = tscval >> 32;
+ info->vm_regs.rax = tscval & 0xffffffffLL;
+ return 0;
+}
+
+int v3_handle_rdtsc(struct guest_info * info) {
+ v3_rdtsc(info);
+
+ info->vm_regs.rax &= 0x00000000ffffffffLL;
+ info->vm_regs.rdx &= 0x00000000ffffffffLL;
+
+ info->rip += 2;
+
+ return 0;
+}
+
+int v3_rdtscp(struct guest_info * info) {
+ int ret;
+ /* First get the MSR value that we need. It's safe to futz with
+ * ra/c/dx here since they're modified by this instruction anyway. */
+ info->vm_regs.rcx = TSC_AUX_MSR;
+ ret = v3_handle_msr_read(info);
+ if (ret) return ret;
+ info->vm_regs.rcx = info->vm_regs.rax;
+
+ /* Now do the TSC half of the instruction, which may hit the normal
+ * TSC hook if it exists */
+ ret = v3_rdtsc(info);
+ if (ret) return ret;
+
+ return 0;
+}
+
+
+int v3_handle_rdtscp(struct guest_info * info) {
+
+ v3_rdtscp(info);
+
+ info->vm_regs.rax &= 0x00000000ffffffffLL;
+ info->vm_regs.rcx &= 0x00000000ffffffffLL;
+ info->vm_regs.rdx &= 0x00000000ffffffffLL;
+
+ info->rip += 3;
+
+ return 0;
+}
+
+static int tsc_aux_msr_read_hook(struct guest_info *info, uint_t msr_num,
+ struct v3_msr *msr_val, void *priv) {
+ struct vm_time * time_state = &(info->time_state);
+
+ V3_ASSERT(msr_num == TSC_AUX_MSR);
+ msr_val->lo = time_state->tsc_aux.lo;
+ msr_val->hi = time_state->tsc_aux.hi;
+
+ return 0;
+}
+
+static int tsc_aux_msr_write_hook(struct guest_info *info, uint_t msr_num,
+ struct v3_msr msr_val, void *priv) {
+ struct vm_time * time_state = &(info->time_state);
+
+ V3_ASSERT(msr_num == TSC_AUX_MSR);
+ time_state->tsc_aux.lo = msr_val.lo;
+ time_state->tsc_aux.hi = msr_val.hi;
+
+ return 0;
+}
+
+static int tsc_msr_read_hook(struct guest_info *info, uint_t msr_num,
+ struct v3_msr *msr_val, void *priv) {
+ uint64_t time = v3_get_guest_tsc(&info->time_state);
+
+ V3_ASSERT(msr_num == TSC_MSR);
+ msr_val->hi = time >> 32;
+ msr_val->lo = time & 0xffffffffLL;
+
+ return 0;
+}
+
+static int tsc_msr_write_hook(struct guest_info *info, uint_t msr_num,
+ struct v3_msr msr_val, void *priv) {
+ struct vm_time * time_state = &(info->time_state);
+ uint64_t guest_time, new_tsc;
+ V3_ASSERT(msr_num == TSC_MSR);
+ new_tsc = (((uint64_t)msr_val.hi) << 32) | (uint64_t)msr_val.lo;
+ guest_time = v3_get_guest_time(time_state);
+ time_state->tsc_time_offset = (sint64_t)new_tsc - (sint64_t)guest_time;
+
+ return 0;
+}
+
+
+void v3_init_time(struct guest_info * info) {
+ struct vm_time * time_state = &(info->time_state);
+
+ time_state->cpu_freq = V3_CPU_KHZ();
+
+ time_state->pause_time = 0;
+ time_state->last_update = 0;
+ time_state->time_offset = 0;
+ time_state->time_div = 1;
+ time_state->time_mult = 1;
+ time_state->tsc_time_offset = 0;
+
+ INIT_LIST_HEAD(&(time_state->timers));
+ time_state->num_timers = 0;
+
+ time_state->tsc_aux.lo = 0;
+ time_state->tsc_aux.hi = 0;
+
+ /* does init_time get called once, or once *per core*??? */
+ v3_hook_msr(info->vm_info, TSC_MSR,
+ tsc_msr_read_hook, tsc_msr_write_hook, NULL);
+ v3_hook_msr(info->vm_info, TSC_AUX_MSR, tsc_aux_msr_read_hook,
+ tsc_aux_msr_write_hook, NULL);
+
+ v3_register_hypercall(info->vm_info, TIME_CPUFREQ_HCALL, handle_cpufreq_hcall, NULL);
+}