Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Still working on timer updates, heading towards being able to have a CPU
Patrick G. Bridges [Mon, 18 Oct 2010 22:26:50 +0000 (16:26 -0600)]
running at a different frequency than a host CPU.

Kconfig
palacios/include/palacios/vmm_time.h
palacios/src/palacios/svm.c
palacios/src/palacios/svm_handler.c
palacios/src/palacios/vmm_time.c
palacios/src/palacios/vmx.c
palacios/src/palacios/vmx_handler.c

diff --git a/Kconfig b/Kconfig
index 9092a67..75c7658 100644 (file)
--- a/Kconfig
+++ b/Kconfig
@@ -218,33 +218,13 @@ endmenu
 
 menu "Time Management"
 
-
 config TIME_VIRTUALIZE_TSC
        bool "Virtualize guest TSC"
        default n
        help
            Virtualize the processor time stamp counter in the guest, 
            generally increasing consistency between various time sources 
-           but also potentially making guest time run slightly slower
-           than real time.
-
-config TIME_VIRTUAL_TSC_CONTROL
-       bool "Adjust virtual TSC towards real time when possible"
-       default y
-       depends on TIME_VIRTUALIZE_TSC
-       help 
-           Enables control of TSC virtualization so that the TSC
-           attempts to catch up with real time when possible
-
-config TIME_MASK_OVERHEAD
-       bool "Hide virtualization overhead from guest timing"
-       default n
-       depends on TIME_VIRTUALIZE_TSC
-       help
-           Try to mask the overhead of virtualization from guests
-           by not including it in the time updates. Can dramatically
-           slow virtual time compared to real time if VIRTUAL_TSC_CONTROL
-           is not enabled.
+           but also potentially making guest time run slower than real time.
 
 endmenu
 
index 0a25833..0377be8 100644 (file)
 
 #include <palacios/vmm_types.h>
 #include <palacios/vmm_list.h>
+#include <palacios/vmm_msr.h>
 #include <palacios/vmm_util.h>
 
 struct guest_info;
 
 struct vm_time {
-    uint32_t cpu_freq; // in kHZ
-
-    uint64_t last_update;  // Last time (in guest time) the timers were updated
-    uint64_t pause_time;   // Cache value to help calculate the guest_tsc
-    sint64_t host_offset;  // Offset of guest time from host time.
-    sint64_t offset_sum;   // Sum of past and current host_offsets
-
-    // Installed Timers 
+    uint32_t cpu_freq;         // in kHZ in terms of guest CPU speed
+                               // which ideally can be different lower than
+                               // host CPU speed!
+         
+    uint32_t time_mult;        // Fields for computing monotonic guest time
+    uint32_t time_div;         // from host (tsc) time
+    sint64_t time_offset;      
+
+    sint64_t tsc_time_offset;  // Offset for computing guest TSC value from
+                               // monotonic guest time
+    
+    uint64_t last_update;      // Last time (in monotonic guest time) the 
+                               // timers were updated
+
+    uint64_t pause_time;       // Cache value to help calculate the guest_tsc
+    
+    struct v3_msr tsc_aux;     // Auxilliary MSR for RDTSCP
+
+    // Installed Timers slaved off of the guest monotonic TSC
     uint_t num_timers;
     struct list_head timers;
 };
@@ -69,16 +81,30 @@ int v3_start_time(struct guest_info * info);
 int v3_pause_time(struct guest_info * info);
 int v3_resume_time(struct guest_info * info);
 
+// Returns host time
 static inline uint64_t v3_get_host_time(struct vm_time *t) {
     uint64_t tmp;
     rdtscll(tmp);
     return tmp;
 }
 
+// Returns *monotonic* guest time.
 static inline uint64_t v3_get_guest_time(struct vm_time *t) {
     if (t->pause_time) return t->pause_time;
-    else return v3_get_host_time(t) + t->host_offset;
+    else return v3_get_host_time(t) + t->time_offset;
+}
+
+// Returns the TSC value seen by the guest
+static inline uint64_t v3_get_guest_tsc(struct vm_time *t) {
+    return v3_get_guest_time(t) + t->tsc_time_offset;
 }
+
+#define TSC_MSR     0x10
+#define TSC_AUX_MSR 0xC0000103
+
+int v3_handle_rdtscp(struct guest_info *info);
+int v3_handle_rdtsc(struct guest_info *info);
+
 #endif // !__V3VEE__
 
 #endif
index dc2b660..12c277f 100644 (file)
@@ -472,9 +472,8 @@ int v3_svm_enter(struct guest_info * info) {
     v3_update_timers(info);
     v3_resume_time(info);
 
-#ifdef CONFIG_TIME_TSC_OFFSET
-    guest_ctrl->TSC_OFFSET = info->time_state.host_offset;
-#endif
+    guest_ctrl->TSC_OFFSET = info->time_state.time_offset 
+       + info->time_state.tsc_time_offset;
 
     //V3_Print("Calling v3_svm_launch\n");
 
index d8b47c5..171d2b5 100644 (file)
@@ -246,9 +246,24 @@ int v3_handle_svm_exit(struct guest_info * info, addr_t exit_code, addr_t exit_i
                return -1;
            }
            break;
-       
-
-
+        case VMEXIT_RDTSC:
+#ifdef CONFIG_DEBUG_TIME
+           PrintDebug("RDTSC/RDTSCP\n");
+#endif 
+           if (v3_handle_rdtsc(info) == -1) {
+               PrintError("Error Handling RDTSC instruction\n");
+               return -1;
+           }
+           break;
+        case VMEXIT_RDTSCP:
+#ifdef CONFIG_DEBUG_TIME
+           PrintDebug("RDTSCP\n");
+#endif 
+           if (v3_handle_rdtscp(info) == -1) {
+               PrintError("Error Handling RDTSCP instruction\n");
+               return -1;
+           }
+           break;
 
 
            /* Exits Following this line are NOT HANDLED */
index e7d5cfe..dc49b1b 100644 (file)
 #define PrintDebug(fmt, args...)
 #endif
 
+/* Overview 
+ *
+ * Time handling in VMMs is challenging, and Palacios uses the highest 
+ * resolution, lowest overhead timer on modern CPUs that it can - the 
+ * processor timestamp counter (TSC). Note that on somewhat old processors
+ * this can be problematic; in particular, older AMD processors did not 
+ * have a constant rate timestamp counter in the face of power management
+ * events. However, the latest Intel and AMD CPUs all do (should...) have a 
+ * constant rate TSC, and Palacios relies on this fact.
+ * 
+ * Basically, Palacios keeps track of three quantities as it runs to manage
+ * the passage of time:
+ * (1) The host timestamp counter - read directly from HW and never written
+ * (2) A monotonic guest timestamp counter used to measure the progression of
+ *     time in the guest. This is computed as a multipler/offset from (1) above
+ * (3) The actual guest timestamp counter (which can be written by
+ *     writing to the guest TSC MSR - MSR 0x10) from the monotonic guest TSC.
+ *     This is also computed as an offset from (2) above when the TSC and
+ *     this offset is updated when the TSC MSR is written.
+ *
+ * Because all other devices are slaved off of the passage of time in the guest,
+ * it is (2) above that drives the firing of other timers in the guest, 
+ * including timer devices such as the Programmable Interrupt Timer (PIT).
+ *
+ *  
+ *
+ */
+
+
 static int handle_cpufreq_hcall(struct guest_info * info, uint_t hcall_id, void * priv_data) {
     struct vm_time * time_state = &(info->time_state);
 
@@ -39,22 +68,6 @@ static int handle_cpufreq_hcall(struct guest_info * info, uint_t hcall_id, void
 
 
 
-void v3_init_time(struct guest_info * info) {
-    struct vm_time * time_state = &(info->time_state);
-
-    time_state->cpu_freq = V3_CPU_KHZ();
-    time_state->pause_time = 0;
-    time_state->last_update = 0;
-    time_state->host_offset = 0;
-    time_state->offset_sum = 0;
-
-    INIT_LIST_HEAD(&(time_state->timers));
-    time_state->num_timers = 0;
-
-    v3_register_hypercall(info->vm_info, TIME_CPUFREQ_HCALL, handle_cpufreq_hcall, NULL);
-}
-
 int v3_start_time(struct guest_info * info) {
     /* We start running with guest_time == host_time */
     uint64_t t = v3_get_host_time(&info->time_state); 
@@ -74,47 +87,14 @@ int v3_pause_time(struct guest_info * info) {
     return 0;
 }
 
-/* Use a control-theoretic approach, specifically a PI control approach,
- * to adjust host_offset towards 0. Overall control documentation in 
- * palacios/docs/time_control.tex Control parameters are P and I, 
- * broken into rational numbers
- */
-
-/* These numbers need to be actually determined by pole placement work. They're 
- * just blind guesses for now, which is a really bad idea. :) */
-#define P_NUM 1
-#define P_DENOM 2
-#define I_NUM 1
-#define I_DENOM 20
-
-void adjust_time_offset(struct guest_info * info) {
-    /* Set point for control: Desired offset = 0; 
-     * Error = host_offset - 0 = host_offset */
-
-    sint64_t adjust;
-
-    /* Update the integral of the errror */
-    info->time_state.offset_sum += info->time_state.host_offset;
-    adjust = (P_NUM * info->time_state.host_offset) / P_DENOM +
-       (I_NUM * info->time_state.offset_sum) / I_DENOM;
-
-    /* We may want to constrain *adjust* because of
-     * resolution/accuracy constraints. Explore that later. */
-    info->time_state.host_offset -= adjust;
-    return;
-}
-
 int v3_resume_time(struct guest_info * info) {
     uint64_t t = v3_get_host_time(&info->time_state);
     V3_ASSERT(info->time_state.pause_time != 0);
-    info->time_state.host_offset = 
+    info->time_state.time_offset = 
        (sint64_t)info->time_state.pause_time - (sint64_t)t;
-#ifdef CONFIG_TIME_TSC_OFFSET_ADJUST
-    adjust_time_offset(info);
-#endif
     info->time_state.pause_time = 0;
     PrintDebug("Time resumed paused at guest time as %llu "
-              "offset %lld from host time.\n", t, info->time_state.host_offset);
+              "offset %lld from host time.\n", t, info->time_state.time_offset);
 
     return 0;
 }
@@ -155,3 +135,133 @@ void v3_update_timers(struct guest_info * info) {
        tmp_timer->ops->update_timer(info, cycles, info->time_state.cpu_freq, tmp_timer->private_data);
     }
 }
+
+
+/* 
+ * Handle full virtualization of the time stamp counter.  As noted
+ * above, we don't store the actual value of the TSC, only the guest's
+ * offset from the host TSC. If the guest write's the to TSC, we handle
+ * this by changing that offset.
+ */ 
+
+int v3_rdtsc(struct guest_info * info) {
+    uint64_t tscval = v3_get_guest_tsc(&info->time_state);
+    info->vm_regs.rdx = tscval >> 32;
+    info->vm_regs.rax = tscval & 0xffffffffLL;
+    return 0;
+}
+
+int v3_handle_rdtsc(struct guest_info * info) {
+    v3_rdtsc(info);
+    
+    info->vm_regs.rax &= 0x00000000ffffffffLL;
+    info->vm_regs.rdx &= 0x00000000ffffffffLL;
+
+    info->rip += 2;
+    
+    return 0;
+}
+
+int v3_rdtscp(struct guest_info * info) {
+    int ret;
+    /* First get the MSR value that we need. It's safe to futz with
+     * ra/c/dx here since they're modified by this instruction anyway. */
+    info->vm_regs.rcx = TSC_AUX_MSR; 
+    ret = v3_handle_msr_read(info);
+    if (ret) return ret;
+    info->vm_regs.rcx = info->vm_regs.rax;
+
+    /* Now do the TSC half of the instruction, which may hit the normal 
+     * TSC hook if it exists */
+    ret = v3_rdtsc(info);
+    if (ret) return ret;
+    
+    return 0;
+}
+
+
+int v3_handle_rdtscp(struct guest_info * info) {
+
+    v3_rdtscp(info);
+    
+    info->vm_regs.rax &= 0x00000000ffffffffLL;
+    info->vm_regs.rcx &= 0x00000000ffffffffLL;
+    info->vm_regs.rdx &= 0x00000000ffffffffLL;
+
+    info->rip += 3;
+    
+    return 0;
+}
+
+static int tsc_aux_msr_read_hook(struct guest_info *info, uint_t msr_num, 
+                                struct v3_msr *msr_val, void *priv) {
+    struct vm_time * time_state = &(info->time_state);
+
+    V3_ASSERT(msr_num == TSC_AUX_MSR);
+    msr_val->lo = time_state->tsc_aux.lo;
+    msr_val->hi = time_state->tsc_aux.hi;
+
+    return 0;
+}
+
+static int tsc_aux_msr_write_hook(struct guest_info *info, uint_t msr_num, 
+                             struct v3_msr msr_val, void *priv) {
+    struct vm_time * time_state = &(info->time_state);
+
+    V3_ASSERT(msr_num == TSC_AUX_MSR);
+    time_state->tsc_aux.lo = msr_val.lo;
+    time_state->tsc_aux.hi = msr_val.hi;
+
+    return 0;
+}
+
+static int tsc_msr_read_hook(struct guest_info *info, uint_t msr_num,
+                            struct v3_msr *msr_val, void *priv) {
+    uint64_t time = v3_get_guest_tsc(&info->time_state);
+
+    V3_ASSERT(msr_num == TSC_MSR);
+    msr_val->hi = time >> 32;
+    msr_val->lo = time & 0xffffffffLL;
+    
+    return 0;
+}
+
+static int tsc_msr_write_hook(struct guest_info *info, uint_t msr_num,
+                            struct v3_msr msr_val, void *priv) {
+    struct vm_time * time_state = &(info->time_state);
+    uint64_t guest_time, new_tsc;
+    V3_ASSERT(msr_num == TSC_MSR);
+    new_tsc = (((uint64_t)msr_val.hi) << 32) | (uint64_t)msr_val.lo;
+    guest_time = v3_get_guest_time(time_state);
+    time_state->tsc_time_offset = (sint64_t)new_tsc - (sint64_t)guest_time; 
+
+    return 0;
+}
+
+
+void v3_init_time(struct guest_info * info) {
+    struct vm_time * time_state = &(info->time_state);
+
+    time_state->cpu_freq = V3_CPU_KHZ();
+    time_state->pause_time = 0;
+    time_state->last_update = 0;
+    time_state->time_offset = 0;
+    time_state->time_div = 1;
+    time_state->time_mult = 1;
+    time_state->tsc_time_offset = 0;
+
+    INIT_LIST_HEAD(&(time_state->timers));
+    time_state->num_timers = 0;
+    
+    time_state->tsc_aux.lo = 0;
+    time_state->tsc_aux.hi = 0;
+
+    /* does init_time get called once, or once *per core*??? */
+    v3_hook_msr(info->vm_info, TSC_MSR, 
+               tsc_msr_read_hook, tsc_msr_write_hook, NULL);
+    v3_hook_msr(info->vm_info, TSC_AUX_MSR, tsc_aux_msr_read_hook, 
+               tsc_aux_msr_write_hook, NULL);
+
+    v3_register_hypercall(info->vm_info, TIME_CPUFREQ_HCALL, handle_cpufreq_hcall, NULL);
+}
index 16fc5cd..ad3d321 100644 (file)
@@ -225,8 +225,9 @@ static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state)
     vmx_state->pri_proc_ctrls.invlpg_exit = 1;
     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
     vmx_state->pri_proc_ctrls.pause_exit = 1;
-#ifdef CONFIG_TIME_TSC_OFFSET
+#ifdef CONFIG_TIME_VIRTUALIZE_TSC
     vmx_state->pri_proc_ctrls.tsc_offset = 1;
+    vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
 #endif
 
     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->vm_info->io_map.arch_data));
@@ -670,13 +671,14 @@ int v3_vmx_enter(struct guest_info * info) {
     v3_update_timers(info);
     v3_resume_time(info);
 
-    tsc_offset_high = 
-       (uint32_t)((info->time_state.host_offset >> 32) & 0xffffffff);
-    tsc_offset_low = (uint32_t)(info->time_state.host_offset & 0xffffffff);
-#ifdef CONFIG_TIME_TSC_OFFSET
-    check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
-    check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
-#endif
+    {
+       sint64_t total_tsc_offset = info->time_state.time_offset + info->time_state.tsc_time_offset;
+
+       tsc_offset_high = (uint32_t)((total_tsc_offset >> 32) & 0xffffffff);
+       tsc_offset_low = (uint32_t)(total_tsc_offset & 0xffffffff);
+       check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
+       check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
+    }
 
     PrintDebug("Stored 0x %x %x into vmcs TSC offset.\n", 
               tsc_offset_high, tsc_offset_low);
index 9d29f9f..357f0d2 100644 (file)
@@ -100,15 +100,16 @@ int v3_handle_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_inf
 
             break;
 
-#if 0
         case VMEXIT_RDTSC:
+#ifdef CONFIG_DEBUG_TIME
+           PrintDebug("RDTSC\n");
+#endif 
            if (v3_handle_rdtsc(info) == -1) {
                PrintError("Error Handling RDTSC instruction\n");
                return -1;
            }
            
            break;
-#endif
 
         case VMEXIT_CPUID:
            if (v3_handle_cpuid(info) == -1) {