From: Jack Lange <jarusl@cs.northwestern.edu>
Date: Mon, 25 Oct 2010 22:38:09 +0000 (-0500)
Subject: Merge branch 'devel' of ssh://palacios@newskysaw/home/palacios/palacios into devel
X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=964e7836a6227341b5d895a830b7e36ad6debffb;hp=4731ff7dc97e42853546b38b4d441d793e7a4ec8;p=palacios-OLD.git

Merge branch 'devel' of ssh://palacios@newskysaw/home/palacios/palacios into devel
---

diff --git a/Kconfig b/Kconfig
index 9d3cbb9..0910a5f 100644
--- a/Kconfig
+++ b/Kconfig
@@ -188,7 +188,6 @@ endmenu
 source "Kconfig.stdlibs"
 
 
-
 menu "Virtual Paging"
 
 config SHADOW_PAGING
@@ -224,6 +223,18 @@ config SHADOW_PAGING_CACHE1
 endmenu
 
 
+menu "Time Management"
+
+config TIME_VIRTUALIZE_TSC
+	bool "Virtualize guest TSC"
+	default n
+	help
+	    Virtualize the processor time stamp counter in the guest, 
+	    generally increasing consistency between various time sources 
+	    but also potentially making guest time run slower than real time.
+
+endmenu
+
 menu "Symbiotic Functions"
 
 config SYMBIOTIC
@@ -331,7 +342,12 @@ config DEBUG_INTERRUPTS
 	help 
 	  This turns on debugging for the interrupt system
 
-
+config DEBUG_TIME
+	bool "Timing"
+	default n
+	depends on DEBUG_ON
+	help
+	  This turns on debugging of system time virtualization
 
 config DEBUG_IO
 	bool "IO"
diff --git a/Makefile b/Makefile
index bb5c155..9d77f82 100644
--- a/Makefile
+++ b/Makefile
@@ -818,7 +818,7 @@ endif
 ALLSOURCE_ARCHS := $(ARCH)
 
 define all-sources
-	( find $(__srctree)/palacios $(RCS_FIND_IGNORE) \
+	( find $(__srctree)palacios $(RCS_FIND_IGNORE) \
 	       \( -name lib \) -prune -o \
 	       -name '*.[chS]' -print; )
 endef
diff --git a/palacios/include/palacios/vm_guest.h b/palacios/include/palacios/vm_guest.h
index 7d82660..04a5632 100644
--- a/palacios/include/palacios/vm_guest.h
+++ b/palacios/include/palacios/vm_guest.h
@@ -114,6 +114,8 @@ struct guest_info {
     struct v3_sym_core_state sym_core_state;
 #endif
 
+    /* Per-core config tree data. */
+    v3_cfg_tree_t * core_cfg_data;
 
     struct v3_vm_info * vm_info;
 
diff --git a/palacios/include/palacios/vmm_time.h b/palacios/include/palacios/vmm_time.h
index 120838d..74c7584 100644
--- a/palacios/include/palacios/vmm_time.h
+++ b/palacios/include/palacios/vmm_time.h
@@ -24,31 +24,32 @@
 
 #include <palacios/vmm_types.h>
 #include <palacios/vmm_list.h>
+#include <palacios/vmm_msr.h>
+#include <palacios/vmm_util.h>
 
 struct guest_info;
 
 struct vm_time {
-    uint32_t cpu_freq; // in kHZ
-
-    // Total number of guest run time cycles
-    uint64_t guest_tsc;
-
-    // Cache value to help calculate the guest_tsc
-    uint64_t cached_host_tsc;
-
-    // The number of cycles pending for notification to the timers
-    //ullong_t pending_cycles;
-
-    // Installed Timers 
+    uint32_t host_cpu_freq;    // in kHZ 
+    uint32_t guest_cpu_freq;   // can be lower than host CPU freq!
+         
+    sint64_t guest_host_offset;// Offset of monotonic guest time from host time
+    sint64_t tsc_guest_offset; // Offset of guest TSC from monotonic guest time
+    
+    uint64_t last_update;      // Last time (in monotonic guest time) the 
+                               // timers were updated
+
+    uint64_t initial_time;     // Time when VMM started. 
+    
+    struct v3_msr tsc_aux;     // Auxilliary MSR for RDTSCP
+
+    // Installed Timers slaved off of the guest monotonic TSC
     uint_t num_timers;
     struct list_head timers;
 };
 
-
-
-
 struct vm_timer_ops {
-    void (*update_time)(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * priv_data);
+    void (*update_timer)(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * priv_data);
     void (*advance_timer)(struct guest_info * info, void * private_data);
 };
 
@@ -59,18 +60,48 @@ struct vm_timer {
     struct list_head timer_link;
 };
 
+// Basic functions for handling passage of time in palacios
+void v3_init_time(struct guest_info * info);
+int v3_start_time(struct guest_info * info);
+int v3_adjust_time(struct guest_info * info);
 
-
-
+// Basic functions for attaching timers to the passage of time
 int v3_add_timer(struct guest_info * info, struct vm_timer_ops * ops, void * private_data);
 int v3_remove_timer(struct guest_info * info, struct vm_timer * timer);
+void v3_update_timers(struct guest_info * info);
 
-void v3_advance_time(struct guest_info * info);
+// Functions to return the different notions of time in Palacios.
+static inline uint64_t v3_get_host_time(struct vm_time *t) {
+    uint64_t tmp;
+    rdtscll(tmp);
+    return tmp;
+}
+
+// Returns *monotonic* guest time.
+static inline uint64_t v3_get_guest_time(struct vm_time *t) {
+    return v3_get_host_time(t) + t->guest_host_offset;
+}
+
+// Returns the TSC value seen by the guest
+static inline uint64_t v3_get_guest_tsc(struct vm_time *t) {
+    return v3_get_guest_time(t) + t->tsc_guest_offset;
+}
+
+// Returns offset of guest TSC from host TSC
+static inline sint64_t v3_tsc_host_offset(struct vm_time *time_state) {
+    return time_state->guest_host_offset + time_state->tsc_guest_offset;
+}
+
+// Functions for handling exits on the TSC when fully virtualizing 
+// the timestamp counter.
+#define TSC_MSR     0x10
+#define TSC_AUX_MSR 0xC0000103
+
+int v3_handle_rdtscp(struct guest_info *info);
+int v3_handle_rdtsc(struct guest_info *info);
 
-void v3_update_time(struct guest_info * info, ullong_t cycles);
 
 
-void v3_init_time(struct guest_info * info);
 
 #endif // !__V3VEE__
 
diff --git a/palacios/src/devices/8254.c b/palacios/src/devices/8254.c
index 51c3f04..ecb2fa0 100644
--- a/palacios/src/devices/8254.c
+++ b/palacios/src/devices/8254.c
@@ -236,7 +236,7 @@ static int handle_crystal_tics(struct vm_device * dev, struct channel * ch, uint
 
 #include <palacios/vm_guest.h>
 
-static void pit_update_time(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * private_data) {
+static void pit_update_timer(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * private_data) {
     struct vm_device * dev = (struct vm_device *)private_data;
     struct pit * state = (struct pit *)dev->private_data;
     //  ullong_t tmp_ctr = state->pit_counter;
@@ -313,14 +313,6 @@ static void pit_update_time(struct guest_info * info, ullong_t cpu_cycles, ullon
     return;
 }
 
-
-static void pit_advance_time(struct guest_info * core, void * private_data) {
-
-    v3_raise_irq(core->vm_info, 0);
-}
-
-
-
 /* This should call out to handle_SQR_WAVE_write, etc...
  */
 static int handle_channel_write(struct channel * ch, char val) {
@@ -624,8 +616,7 @@ static int pit_write_command(struct guest_info * core, ushort_t port, void * src
 
 
 static struct vm_timer_ops timer_ops = {
-    .update_time = pit_update_time,
-    .advance_timer = pit_advance_time,
+    .update_timer = pit_update_timer,
 };
 
 
diff --git a/palacios/src/devices/apic.c b/palacios/src/devices/apic.c
index a844c6c..40b90a1 100644
--- a/palacios/src/devices/apic.c
+++ b/palacios/src/devices/apic.c
@@ -1100,7 +1100,7 @@ static struct intr_ctrl_ops intr_ops = {
 
 
 static struct vm_timer_ops timer_ops = {
-    .update_time = apic_update_time,
+    .update_timer = apic_update_time,
 };
 
 
diff --git a/palacios/src/devices/icc_bus.c b/palacios/src/devices/icc_bus.c
index 79f9159..e9fe95c 100644
--- a/palacios/src/devices/icc_bus.c
+++ b/palacios/src/devices/icc_bus.c
@@ -135,7 +135,7 @@ static int deliver(uint32_t src_apic, struct apic_data *dest_apic, struct int_cm
 
 	    // Sanity check
 	    if (core->cpu_mode != INIT) { 
-		PrintError("icc_bus: Warning: core %u is not in INIT state, ignored\n",core->cpu_id);
+		PrintError("icc_bus: Warning: core %u is not in INIT state (mode = %d), ignored\n",core->cpu_id, core->cpu_mode);
 		// Only a warning, since INIT INIT SIPI is common
 		break;
 	    }
@@ -159,7 +159,7 @@ static int deliver(uint32_t src_apic, struct apic_data *dest_apic, struct int_cm
 
 	    // Sanity check
 	    if (core->cpu_mode!=SIPI) { 
-		PrintError("icc_bus: core %u is not in SIPI state, ignored!\n",core->cpu_id);
+		PrintError("icc_bus: core %u is not in SIPI state (mode = %d), ignored!\n",core->cpu_id, core->cpu_mode);
 		break;
 	    }
 
@@ -204,8 +204,6 @@ static int deliver(uint32_t src_apic, struct apic_data *dest_apic, struct int_cm
 
 int v3_icc_send_ipi(struct vm_device * icc_bus, uint32_t src_apic, uint64_t icr_data, uint32_t extirq) {
 
-    PrintDebug("icc_bus: icc_bus=%p, src_apic=%u, icr_data=%llx, extirq=%u\n",icc_bus,src_apic,icr_data,extirq);
-
     struct int_cmd_reg *icr = (struct int_cmd_reg *)&icr_data;
 
     struct icc_bus_state * state = (struct icc_bus_state *)icc_bus->private_data;
diff --git a/palacios/src/palacios/svm.c b/palacios/src/palacios/svm.c
index 7b33c8c..35f2047 100644
--- a/palacios/src/palacios/svm.c
+++ b/palacios/src/palacios/svm.c
@@ -80,8 +80,6 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
 
 
     //
-
-
     ctrl_area->svm_instrs.VMRUN = 1;
     ctrl_area->svm_instrs.VMMCALL = 1;
     ctrl_area->svm_instrs.VMLOAD = 1;
@@ -99,6 +97,12 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
     ctrl_area->instrs.CPUID = 1;
 
     ctrl_area->instrs.HLT = 1;
+
+#ifdef CONFIG_TIME_VIRTUALIZE_TSC
+    ctrl_area->instrs.rdtsc = 1;
+    ctrl_area->svm_instrs.rdtscp = 1;
+#endif
+
     // guest_state->cr0 = 0x00000001;    // PE 
   
     /*
@@ -422,7 +426,6 @@ static int update_irq_entry_state(struct guest_info * info) {
 int v3_svm_enter(struct guest_info * info) {
     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
-    ullong_t tmp_tsc;
     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
 
     // Conditionally yield the CPU if the timeslice has expired
@@ -475,32 +478,26 @@ int v3_svm_enter(struct guest_info * info) {
     }
 #endif
 
+    v3_update_timers(info);
 
-    rdtscll(tmp_tsc);
-    v3_update_time(info, (tmp_tsc - info->time_state.cached_host_tsc));
-    rdtscll(info->time_state.cached_host_tsc);
-    //    guest_ctrl->TSC_OFFSET = info->time_state.guest_tsc - info->time_state.cached_host_tsc;
+    /* If this guest is frequency-lagged behind host time, wait 
+     * for the appropriate host time before resuming the guest. */
+    v3_adjust_time(info);
 
-    //V3_Print("Calling v3_svm_launch\n");
+    guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
 
+    //V3_Print("Calling v3_svm_launch\n");
 
     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[info->cpu_id]);
-    
-    //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
 
+    //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
 
     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
 
-    //rdtscll(tmp_tsc);
-    //    v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc);
-
     //PrintDebug("SVM Returned\n");
     
     info->num_exits++;
 
-
-
-
     // Save Guest state from VMCB
     info->rip = guest_state->rip;
     info->vm_regs.rsp = guest_state->rsp;
@@ -596,8 +593,7 @@ int v3_start_svm_guest(struct guest_info *info) {
     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
     
     info->vm_info->run_state = VM_RUNNING;
-    rdtscll(info->yield_start_cycle);
-
+    v3_start_time(info);
 
     while (1) {
 	if (v3_svm_enter(info) == -1) {
diff --git a/palacios/src/palacios/svm_handler.c b/palacios/src/palacios/svm_handler.c
index d8b47c5..171d2b5 100644
--- a/palacios/src/palacios/svm_handler.c
+++ b/palacios/src/palacios/svm_handler.c
@@ -246,9 +246,24 @@ int v3_handle_svm_exit(struct guest_info * info, addr_t exit_code, addr_t exit_i
 		return -1;
 	    }
 	    break;
-	
-
-
+        case VMEXIT_RDTSC:
+#ifdef CONFIG_DEBUG_TIME
+	    PrintDebug("RDTSC/RDTSCP\n");
+#endif 
+	    if (v3_handle_rdtsc(info) == -1) {
+		PrintError("Error Handling RDTSC instruction\n");
+		return -1;
+	    }
+	    break;
+        case VMEXIT_RDTSCP:
+#ifdef CONFIG_DEBUG_TIME
+	    PrintDebug("RDTSCP\n");
+#endif 
+	    if (v3_handle_rdtscp(info) == -1) {
+		PrintError("Error Handling RDTSCP instruction\n");
+		return -1;
+	    }
+	    break;
 
 
 	    /* Exits Following this line are NOT HANDLED */
diff --git a/palacios/src/palacios/vm_guest.c b/palacios/src/palacios/vm_guest.c
index 4dddf6e..fe22f0b 100644
--- a/palacios/src/palacios/vm_guest.c
+++ b/palacios/src/palacios/vm_guest.c
@@ -365,23 +365,25 @@ void v3_print_GPRs(struct guest_info * info) {
 #include <palacios/vmcb.h>
 static int info_hcall(struct guest_info * core, uint_t hcall_id, void * priv_data) {
     v3_cpu_arch_t cpu_type = v3_get_cpu_type(v3_get_cpu_id());
-    
+    int cpu_valid = 0;
+
     v3_print_guest_state(core);
     
-
     // init SVM/VMX
 #ifdef CONFIG_SVM
     if ((cpu_type == V3_SVM_CPU) || (cpu_type == V3_SVM_REV3_CPU)) {
+	cpu_valid = 1;
 	PrintDebugVMCB((vmcb_t *)(core->vmm_data));
     }
 #endif
 #ifdef CONFIG_VMX
-    else if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
+    if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
+	cpu_valid = 1;
 	v3_print_vmcs();
     }
 #endif
-    else {
-	PrintError("Invalid CPU Type\n");
+    if (!cpu_valid) {
+	PrintError("Invalid CPU Type 0x%x\n", cpu_type);
 	return -1;
     }
     
@@ -405,7 +407,7 @@ static int info_hcall(struct guest_info * core, uint_t hcall_id, void * priv_dat
 
 int v3_init_vm(struct v3_vm_info * vm) {
     v3_cpu_arch_t cpu_type = v3_get_cpu_type(v3_get_cpu_id());
-
+    int cpu_valid = 0;
 
     if (v3_get_foreground_vm() == NULL) {
 	v3_set_foreground_vm(vm);
@@ -449,24 +451,23 @@ int v3_init_vm(struct v3_vm_info * vm) {
     if ((cpu_type == V3_SVM_CPU) || (cpu_type == V3_SVM_REV3_CPU)) {
 	v3_init_svm_io_map(vm);
 	v3_init_svm_msr_map(vm);
-    }
+	cpu_valid = 1;
+    } 
 #endif
 #ifdef CONFIG_VMX
-    else if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
+    if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
 	v3_init_vmx_io_map(vm);
 	v3_init_vmx_msr_map(vm);
+	cpu_valid = 1;
     }
 #endif
-    else {
-	PrintError("Invalid CPU Type\n");
+    if (!cpu_valid) {
+	PrintError("Invalid CPU Type 0x%x\n", cpu_type);
 	return -1;
     }
     
-
-
     v3_register_hypercall(vm, GUEST_INFO_HCALL, info_hcall, NULL);
 
-
     V3_Print("GUEST_INFO_HCALL=%x\n", GUEST_INFO_HCALL);
 
     return 0;
@@ -474,6 +475,7 @@ int v3_init_vm(struct v3_vm_info * vm) {
 
 int v3_init_core(struct guest_info * core) {
     v3_cpu_arch_t cpu_type = v3_get_cpu_type(v3_get_cpu_id());
+    int cpu_valid = 0;
     struct v3_vm_info * vm = core->vm_info;
 
     /*
@@ -505,18 +507,20 @@ int v3_init_core(struct guest_info * core) {
 	    PrintError("Error in SVM initialization\n");
 	    return -1;
 	}
+	cpu_valid = 1;
     }
 #endif
 #ifdef CONFIG_VMX
-    else if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
+    if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
 	if (v3_init_vmx_vmcs(core, vm->vm_class) == -1) {
 	    PrintError("Error in VMX initialization\n");
 	    return -1;
 	}
+	cpu_valid = 1;
     }
 #endif
-    else {
-	PrintError("Invalid CPU Type\n");
+    if (!cpu_valid) {
+	PrintError("Invalid CPU Type 0x%x\n", cpu_type);
 	return -1;
     }
 
diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c
index 49f30ba..e129d29 100644
--- a/palacios/src/palacios/vmm.c
+++ b/palacios/src/palacios/vmm.c
@@ -292,7 +292,7 @@ v3_cpu_mode_t v3_get_host_cpu_mode() {
 
 void v3_yield_cond(struct guest_info * info) {
     uint64_t cur_cycle;
-    rdtscll(cur_cycle);
+    cur_cycle = v3_get_host_time(&info->time_state);
 
     if (cur_cycle > (info->yield_start_cycle + info->vm_info->yield_cycle_period)) {
 
@@ -301,7 +301,7 @@ void v3_yield_cond(struct guest_info * info) {
 	  (void *)cur_cycle, (void *)info->yield_start_cycle, (void *)info->yield_cycle_period);
 	*/
 	V3_Yield();
-	rdtscll(info->yield_start_cycle);
+	info->yield_start_cycle = v3_get_host_time(&info->time_state);
     }
 }
 
@@ -315,7 +315,7 @@ void v3_yield(struct guest_info * info) {
     V3_Yield();
 
     if (info) {
-	rdtscll(info->yield_start_cycle);
+	info->yield_start_cycle = v3_get_host_time(&info->time_state);
     }
 }
 
diff --git a/palacios/src/palacios/vmm_config.c b/palacios/src/palacios/vmm_config.c
index 738d2ee..ebad8b7 100644
--- a/palacios/src/palacios/vmm_config.c
+++ b/palacios/src/palacios/vmm_config.c
@@ -497,6 +497,7 @@ struct v3_vm_info * v3_config_guest(void * cfg_blob) {
 
 	info->cpu_id = i;
 	info->vm_info = vm;
+	info->core_cfg_data = per_core_cfg;
 
 	if (pre_config_core(info, per_core_cfg) == -1) {
 	    PrintError("Error in core %d preconfiguration\n", i);
diff --git a/palacios/src/palacios/vmm_halt.c b/palacios/src/palacios/vmm_halt.c
index 5015046..7970a40 100644
--- a/palacios/src/palacios/vmm_halt.c
+++ b/palacios/src/palacios/vmm_halt.c
@@ -38,17 +38,12 @@ int v3_handle_halt(struct guest_info * info) {
     if (info->cpl != 0) { 
 	v3_raise_exception(info, GPF_EXCEPTION);
     } else {
-	uint64_t yield_start = 0;
-	
 	PrintDebug("CPU Yield\n");
 
 	while (!v3_intr_pending(info)) {
-	    rdtscll(yield_start);
+	    /* Since we're in an exit, time is already paused here, so no need to pause again. */
 	    v3_yield(info);
-	    
-	    v3_update_time(info, yield_start - info->time_state.cached_host_tsc);
-	    
-	    rdtscll(info->time_state.cached_host_tsc);
+	    v3_update_timers(info);
 	    
 	    /* At this point, we either have some combination of 
 	       interrupts, including perhaps a timer interrupt, or 
diff --git a/palacios/src/palacios/vmm_msr.c b/palacios/src/palacios/vmm_msr.c
index 96d3ddc..66a14d4 100644
--- a/palacios/src/palacios/vmm_msr.c
+++ b/palacios/src/palacios/vmm_msr.c
@@ -26,6 +26,8 @@
 void v3_init_msr_map(struct v3_vm_info * vm) {
     struct v3_msr_map * msr_map  = &(vm->msr_map);
 
+    PrintDebug("Initializing MSR map.\n");
+
     INIT_LIST_HEAD(&(msr_map->hook_list));
     msr_map->num_hooks = 0;
 
diff --git a/palacios/src/palacios/vmm_time.c b/palacios/src/palacios/vmm_time.c
index b169669..44affe4 100644
--- a/palacios/src/palacios/vmm_time.c
+++ b/palacios/src/palacios/vmm_time.c
@@ -12,6 +12,7 @@
  * All rights reserved.
  *
  * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *         Patrick G. Bridges <bridges@cs.unm.edu>
  *
  * This is free software.  You are permitted to use,
  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
@@ -21,10 +22,57 @@
 #include <palacios/vmm.h>
 #include <palacios/vm_guest.h>
 
+#ifndef CONFIG_DEBUG_TIME
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+/* Overview 
+ *
+ * Time handling in VMMs is challenging, and Palacios uses the highest 
+ * resolution, lowest overhead timer on modern CPUs that it can - the 
+ * processor timestamp counter (TSC). Note that on somewhat old processors
+ * this can be problematic; in particular, older AMD processors did not 
+ * have a constant rate timestamp counter in the face of power management
+ * events. However, the latest Intel and AMD CPUs all do (should...) have a 
+ * constant rate TSC, and Palacios relies on this fact.
+ * 
+ * Basically, Palacios keeps track of three quantities as it runs to manage
+ * the passage of time:
+ * (1) The host timestamp counter - read directly from HW and never written
+ * (2) A monotonic guest timestamp counter used to measure the progression of
+ *     time in the guest. This is computed using an offsets from (1) above.
+ * (3) The actual guest timestamp counter (which can be written by
+ *     writing to the guest TSC MSR - MSR 0x10) from the monotonic guest TSC.
+ *     This is also computed as an offset from (2) above when the TSC and
+ *     this offset is updated when the TSC MSR is written.
+ *
+ * The value used to offset the guest TSC from the host TSC is the *sum* of all
+ * of these offsets (2 and 3) above
+ * 
+ * Because all other devices are slaved off of the passage of time in the guest,
+ * it is (2) above that drives the firing of other timers in the guest, 
+ * including timer devices such as the Programmable Interrupt Timer (PIT).
+ *
+ * Future additions:
+ * (1) Add support for temporarily skewing guest time off of where it should
+ *     be to support slack simulation of guests. The idea is that simulators
+ *     set this skew to be the difference between how much time passed for a 
+ *     simulated feature and a real implementation of that feature, making 
+ *     pass at a different rate from real time on this core. The VMM will then
+ *     attempt to move this skew back towards 0 subject to resolution/accuracy
+ *     constraints from various system timers.
+ *   
+ *     The main effort in doing this will be to get accuracy/resolution 
+ *     information from each local timer and to use this to bound how much skew
+ *     is removed on each exit.
+ */
+
+
 static int handle_cpufreq_hcall(struct guest_info * info, uint_t hcall_id, void * priv_data) {
     struct vm_time * time_state = &(info->time_state);
 
-    info->vm_regs.rbx = time_state->cpu_freq;
+    info->vm_regs.rbx = time_state->guest_cpu_freq;
 
     PrintDebug("Guest request cpu frequency: return %ld\n", (long)info->vm_regs.rbx);
     
@@ -33,23 +81,49 @@ static int handle_cpufreq_hcall(struct guest_info * info, uint_t hcall_id, void
 
 
 
-void v3_init_time(struct guest_info * info) {
+int v3_start_time(struct guest_info * info) {
+    /* We start running with guest_time == host_time */
+    uint64_t t = v3_get_host_time(&info->time_state); 
+
+    PrintDebug("Starting initial guest time as %llu\n", t);
+    info->time_state.last_update = t;
+    info->time_state.initial_time = t;
+    info->yield_start_cycle = t;
+    return 0;
+}
+
+// If the guest is supposed to run slower than the host, yield out until
+// the host time is appropriately far along;
+int v3_adjust_time(struct guest_info * info) {
     struct vm_time * time_state = &(info->time_state);
 
-    time_state->cpu_freq = V3_CPU_KHZ();
- 
-    time_state->guest_tsc = 0;
-    time_state->cached_host_tsc = 0;
-    // time_state->pending_cycles = 0;
-  
-    INIT_LIST_HEAD(&(time_state->timers));
-    time_state->num_timers = 0;
+    if (time_state->host_cpu_freq == time_state->guest_cpu_freq) {
+	time_state->guest_host_offset = 0;
+    } else {
+	uint64_t guest_time, guest_elapsed, desired_elapsed;
+	uint64_t host_time, target_host_time;
 
-    v3_register_hypercall(info->vm_info, TIME_CPUFREQ_HCALL, handle_cpufreq_hcall, NULL);
-}
+	guest_time = v3_get_guest_time(time_state);
 
+	/* Compute what host time this guest time should correspond to. */
+	guest_elapsed = (guest_time - time_state->initial_time);
+	desired_elapsed = (guest_elapsed * time_state->host_cpu_freq) / time_state->guest_cpu_freq;
+	target_host_time = time_state->initial_time + desired_elapsed;
 
-int v3_add_timer(struct guest_info * info, struct vm_timer_ops * ops, void * private_data) {
+	/* Yield until that host time is reached */
+	host_time = v3_get_host_time(time_state);
+	while (host_time < target_host_time) {
+	    v3_yield(info);
+	    host_time = v3_get_host_time(time_state);
+	}
+
+	time_state->guest_host_offset = (sint64_t)guest_time - (sint64_t)host_time;
+    }
+    return 0;
+}
+
+int v3_add_timer(struct guest_info * info, struct vm_timer_ops * ops, 
+	     void * private_data) {
     struct vm_timer * timer = NULL;
     timer = (struct vm_timer *)V3_Malloc(sizeof(struct vm_timer));
     V3_ASSERT(timer != NULL);
@@ -63,7 +137,6 @@ int v3_add_timer(struct guest_info * info, struct vm_timer_ops * ops, void * pri
     return 0;
 }
 
-
 int v3_remove_timer(struct guest_info * info, struct vm_timer * timer) {
     list_del(&(timer->timer_link));
     info->time_state.num_timers--;
@@ -72,34 +145,182 @@ int v3_remove_timer(struct guest_info * info, struct vm_timer * timer) {
     return 0;
 }
 
+void v3_update_timers(struct guest_info * info) {
+    struct vm_timer * tmp_timer;
+    uint64_t old_time = info->time_state.last_update;
+    uint64_t cycles;
 
+    info->time_state.last_update = v3_get_guest_time(&info->time_state);
+    cycles = info->time_state.last_update - old_time;
 
-void v3_update_time(struct guest_info * info, uint64_t cycles) {
-    struct vm_timer * tmp_timer;
+    list_for_each_entry(tmp_timer, &(info->time_state.timers), timer_link) {
+	tmp_timer->ops->update_timer(info, cycles, info->time_state.guest_cpu_freq, tmp_timer->private_data);
+    }
+}
+
+/* 
+ * Handle full virtualization of the time stamp counter.  As noted
+ * above, we don't store the actual value of the TSC, only the guest's
+ * offset from monotonic guest's time. If the guest writes to the TSC, we
+ * handle this by changing that offset.
+ *
+ * Possible TODO: Proper hooking of TSC read/writes?
+ */ 
+
+int v3_rdtsc(struct guest_info * info) {
+    uint64_t tscval = v3_get_guest_tsc(&info->time_state);
+    info->vm_regs.rdx = tscval >> 32;
+    info->vm_regs.rax = tscval & 0xffffffffLL;
+    return 0;
+}
+
+int v3_handle_rdtsc(struct guest_info * info) {
+    v3_rdtsc(info);
     
-    //   cycles *= 8;
+    info->vm_regs.rax &= 0x00000000ffffffffLL;
+    info->vm_regs.rdx &= 0x00000000ffffffffLL;
 
-//    cycles /= 150;
+    info->rip += 2;
+    
+    return 0;
+}
 
-    info->time_state.guest_tsc += cycles;
+int v3_rdtscp(struct guest_info * info) {
+    int ret;
+    /* First get the MSR value that we need. It's safe to futz with
+     * ra/c/dx here since they're modified by this instruction anyway. */
+    info->vm_regs.rcx = TSC_AUX_MSR; 
+    ret = v3_handle_msr_read(info);
+    if (ret) return ret;
+    info->vm_regs.rcx = info->vm_regs.rax;
 
-    list_for_each_entry(tmp_timer, &(info->time_state.timers), timer_link) {
-	tmp_timer->ops->update_time(info, cycles, info->time_state.cpu_freq, tmp_timer->private_data);
-    }
-  
+    /* Now do the TSC half of the instruction */
+    ret = v3_rdtsc(info);
+    if (ret) return ret;
+    
+    return 0;
+}
+
+
+int v3_handle_rdtscp(struct guest_info * info) {
+
+    v3_rdtscp(info);
+    
+    info->vm_regs.rax &= 0x00000000ffffffffLL;
+    info->vm_regs.rcx &= 0x00000000ffffffffLL;
+    info->vm_regs.rdx &= 0x00000000ffffffffLL;
+
+    info->rip += 3;
+    
+    return 0;
+}
 
+static int tsc_aux_msr_read_hook(struct guest_info *info, uint_t msr_num, 
+				 struct v3_msr *msr_val, void *priv) {
+    struct vm_time * time_state = &(info->time_state);
+
+    V3_ASSERT(msr_num == TSC_AUX_MSR);
+    msr_val->lo = time_state->tsc_aux.lo;
+    msr_val->hi = time_state->tsc_aux.hi;
 
-    //info->time_state.pending_cycles = 0;
+    return 0;
+}
+
+static int tsc_aux_msr_write_hook(struct guest_info *info, uint_t msr_num, 
+			      struct v3_msr msr_val, void *priv) {
+    struct vm_time * time_state = &(info->time_state);
+
+    V3_ASSERT(msr_num == TSC_AUX_MSR);
+    time_state->tsc_aux.lo = msr_val.lo;
+    time_state->tsc_aux.hi = msr_val.hi;
+
+    return 0;
+}
+
+static int tsc_msr_read_hook(struct guest_info *info, uint_t msr_num,
+			     struct v3_msr *msr_val, void *priv) {
+    uint64_t time = v3_get_guest_tsc(&info->time_state);
+
+    V3_ASSERT(msr_num == TSC_MSR);
+    msr_val->hi = time >> 32;
+    msr_val->lo = time & 0xffffffffLL;
+    
+    return 0;
+}
+
+static int tsc_msr_write_hook(struct guest_info *info, uint_t msr_num,
+			     struct v3_msr msr_val, void *priv) {
+    struct vm_time * time_state = &(info->time_state);
+    uint64_t guest_time, new_tsc;
+    V3_ASSERT(msr_num == TSC_MSR);
+    new_tsc = (((uint64_t)msr_val.hi) << 32) | (uint64_t)msr_val.lo;
+    guest_time = v3_get_guest_time(time_state);
+    time_state->tsc_guest_offset = (sint64_t)new_tsc - (sint64_t)guest_time; 
+
+    return 0;
 }
 
-void v3_advance_time(struct guest_info * core) {
-    struct vm_timer * tmp_timer;
 
+static int init_vm_time(struct v3_vm_info *vm_info) {
+    int ret;
 
-    list_for_each_entry(tmp_timer, &(core->time_state.timers), timer_link) {
-	tmp_timer->ops->advance_timer(core, tmp_timer->private_data);
+    PrintDebug("Installing TSC MSR hook.\n");
+    ret = v3_hook_msr(vm_info, TSC_MSR, 
+		      tsc_msr_read_hook, tsc_msr_write_hook, NULL);
+
+    PrintDebug("Installing TSC_AUX MSR hook.\n");
+    if (ret) return ret;
+    ret = v3_hook_msr(vm_info, TSC_AUX_MSR, tsc_aux_msr_read_hook, 
+		      tsc_aux_msr_write_hook, NULL);
+    if (ret) return ret;
+
+    PrintDebug("Registering TIME_CPUFREQ hypercall.\n");
+    ret = v3_register_hypercall(vm_info, TIME_CPUFREQ_HCALL, 
+				handle_cpufreq_hcall, NULL);
+    return ret;
+}
+
+void v3_init_time(struct guest_info * info) {
+    struct vm_time * time_state = &(info->time_state);
+    v3_cfg_tree_t * cfg_tree = info->core_cfg_data;
+    static int one_time = 0;
+    char *khz;
+
+    time_state->host_cpu_freq = V3_CPU_KHZ();
+    khz = v3_cfg_val(cfg_tree, "khz");
+    if (khz) {
+	time_state->guest_cpu_freq = atoi(khz);
+	PrintDebug("Core %d CPU frequency requested at %d khz.\n", 
+		   info->cpu_id, time_state->guest_cpu_freq);
+    }
+    
+    if (!khz || time_state->guest_cpu_freq > time_state->host_cpu_freq) {
+	time_state->guest_cpu_freq = time_state->host_cpu_freq;
     }
-  
+    PrintDebug("Core %d CPU frequency set to %d KHz (host CPU frequency = %d KHz).\n", info->cpu_id, time_state->guest_cpu_freq, time_state->host_cpu_freq);
 
+    time_state->initial_time = 0;
+    time_state->last_update = 0;
+    time_state->guest_host_offset = 0;
+    time_state->tsc_guest_offset = 0;
 
+    INIT_LIST_HEAD(&(time_state->timers));
+    time_state->num_timers = 0;
+    
+    time_state->tsc_aux.lo = 0;
+    time_state->tsc_aux.hi = 0;
+
+    if (!one_time) {
+	init_vm_time(info->vm_info);
+	one_time = 1;
+    }
 }
+
+
+
+
+
+
+
+
+
diff --git a/palacios/src/palacios/vmx.c b/palacios/src/palacios/vmx.c
index 18d183b..7cea8a1 100644
--- a/palacios/src/palacios/vmx.c
+++ b/palacios/src/palacios/vmx.c
@@ -225,6 +225,10 @@ static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state)
     vmx_state->pri_proc_ctrls.invlpg_exit = 1;
     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
     vmx_state->pri_proc_ctrls.pause_exit = 1;
+    vmx_state->pri_proc_ctrls.tsc_offset = 1;
+#ifdef CONFIG_TIME_VIRTUALIZE_TSC
+    vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
+#endif
 
     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->vm_info->io_map.arch_data));
     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
@@ -636,13 +640,12 @@ static void print_exit_log(struct guest_info * info) {
  */
 int v3_vmx_enter(struct guest_info * info) {
     int ret = 0;
-    uint64_t tmp_tsc = 0;
+    uint32_t tsc_offset_low, tsc_offset_high;
     struct vmx_exit_info exit_info;
 
     // Conditionally yield the CPU if the timeslice has expired
     v3_yield_cond(info);
 
-
     // v3_print_guest_state(info);
 
     // disable global interrupts for vm state transition
@@ -665,10 +668,16 @@ int v3_vmx_enter(struct guest_info * info) {
 	vmcs_write(VMCS_GUEST_CR3, guest_cr3);
     }
 
-    // We do timer injection here to track real host time.
-    rdtscll(tmp_tsc);
-    v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc);
-    rdtscll(info->time_state.cached_host_tsc);
+    v3_update_timers(info);
+
+    /* If this guest is frequency-lagged behind host time, wait 
+     * for the appropriate host time before resuming the guest. */
+    v3_adjust_time(info);
+
+    tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
+    tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
+    check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
+    check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
 
     if (info->vm_info->run_state == VM_STOPPED) {
 	info->vm_info->run_state = VM_RUNNING;
@@ -688,12 +697,8 @@ int v3_vmx_enter(struct guest_info * info) {
 	return -1;
     }
 
-    //   rdtscll(tmp_tsc);
-    //    v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc);
-
     info->num_exits++;
 
-
     /* Update guest state */
     v3_vmx_save_vmcs(info);
 
@@ -741,11 +746,38 @@ int v3_vmx_enter(struct guest_info * info) {
 
 int v3_start_vmx_guest(struct guest_info* info) {
 
+    PrintDebug("Starting VMX core %u\n",info->cpu_id);
+    if (info->cpu_mode==INIT) {
+        PrintDebug("VMX core %u: I am an AP in INIT mode, waiting for that to change\n",info->cpu_id);
+        while (info->cpu_mode==INIT) {
+            v3_yield(info);
+            //PrintDebug("VMX core %u: still waiting for INIT\n",info->cpu_id);
+        }
+        PrintDebug("VMX core %u: I am out of INIT\n",info->cpu_id);
+        if (info->cpu_mode==SIPI) {
+            PrintDebug("VMX core %u: I am waiting on a SIPI to set my starting address\n",info->cpu_id);
+            while (info->cpu_mode==SIPI) {
+                v3_yield(info);
+                //PrintDebug("VMX core %u: still waiting for SIPI\n",info->cpu_id);
+            }
+        }
+        PrintDebug("VMX core %u: I have my SIPI\n", info->cpu_id);
+    }
+
+    if (info->cpu_mode!=REAL) {
+        PrintError("VMX core %u: I am not in REAL mode at launch!  Huh?!\n", info->cpu_id);
+        return -1;
+    }
+
+    PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
+               info->cpu_id, info->segments.cs.selector, (void*)(info->segments.cs.base),
+               info->segments.cs.limit,(void*)(info->rip));
+
 
-    PrintDebug("Launching VMX guest\n");
 
-    rdtscll(info->time_state.cached_host_tsc);
+    PrintDebug("VMX core %u: Launching VMX VM\n", info->cpu_id);
 
+    v3_start_time(info);
 
     while (1) {
 	if (v3_vmx_enter(info) == -1) {
diff --git a/palacios/src/palacios/vmx_handler.c b/palacios/src/palacios/vmx_handler.c
index d88210d..357f0d2 100644
--- a/palacios/src/palacios/vmx_handler.c
+++ b/palacios/src/palacios/vmx_handler.c
@@ -99,6 +99,18 @@ int v3_handle_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_inf
             }
 
             break;
+
+        case VMEXIT_RDTSC:
+#ifdef CONFIG_DEBUG_TIME
+	    PrintDebug("RDTSC\n");
+#endif 
+	    if (v3_handle_rdtsc(info) == -1) {
+		PrintError("Error Handling RDTSC instruction\n");
+		return -1;
+	    }
+	    
+	    break;
+
         case VMEXIT_CPUID:
 	    if (v3_handle_cpuid(info) == -1) {
 		PrintError("Error Handling CPUID instruction\n");