From: Jack Lange Date: Mon, 25 Oct 2010 22:38:09 +0000 (-0500) Subject: Merge branch 'devel' of ssh://palacios@newskysaw/home/palacios/palacios into devel X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=964e7836a6227341b5d895a830b7e36ad6debffb;hp=4731ff7dc97e42853546b38b4d441d793e7a4ec8;p=palacios-OLD.git Merge branch 'devel' of ssh://palacios@newskysaw/home/palacios/palacios into devel --- diff --git a/Kconfig b/Kconfig index 9d3cbb9..0910a5f 100644 --- a/Kconfig +++ b/Kconfig @@ -188,7 +188,6 @@ endmenu source "Kconfig.stdlibs" - menu "Virtual Paging" config SHADOW_PAGING @@ -224,6 +223,18 @@ config SHADOW_PAGING_CACHE1 endmenu +menu "Time Management" + +config TIME_VIRTUALIZE_TSC + bool "Virtualize guest TSC" + default n + help + Virtualize the processor time stamp counter in the guest, + generally increasing consistency between various time sources + but also potentially making guest time run slower than real time. + +endmenu + menu "Symbiotic Functions" config SYMBIOTIC @@ -331,7 +342,12 @@ config DEBUG_INTERRUPTS help This turns on debugging for the interrupt system - +config DEBUG_TIME + bool "Timing" + default n + depends on DEBUG_ON + help + This turns on debugging of system time virtualization config DEBUG_IO bool "IO" diff --git a/Makefile b/Makefile index bb5c155..9d77f82 100644 --- a/Makefile +++ b/Makefile @@ -818,7 +818,7 @@ endif ALLSOURCE_ARCHS := $(ARCH) define all-sources - ( find $(__srctree)/palacios $(RCS_FIND_IGNORE) \ + ( find $(__srctree)palacios $(RCS_FIND_IGNORE) \ \( -name lib \) -prune -o \ -name '*.[chS]' -print; ) endef diff --git a/palacios/include/palacios/vm_guest.h b/palacios/include/palacios/vm_guest.h index 7d82660..04a5632 100644 --- a/palacios/include/palacios/vm_guest.h +++ b/palacios/include/palacios/vm_guest.h @@ -114,6 +114,8 @@ struct guest_info { struct v3_sym_core_state sym_core_state; #endif + /* Per-core config tree data. */ + v3_cfg_tree_t * core_cfg_data; struct v3_vm_info * vm_info; diff --git a/palacios/include/palacios/vmm_time.h b/palacios/include/palacios/vmm_time.h index 120838d..74c7584 100644 --- a/palacios/include/palacios/vmm_time.h +++ b/palacios/include/palacios/vmm_time.h @@ -24,31 +24,32 @@ #include #include +#include +#include struct guest_info; struct vm_time { - uint32_t cpu_freq; // in kHZ - - // Total number of guest run time cycles - uint64_t guest_tsc; - - // Cache value to help calculate the guest_tsc - uint64_t cached_host_tsc; - - // The number of cycles pending for notification to the timers - //ullong_t pending_cycles; - - // Installed Timers + uint32_t host_cpu_freq; // in kHZ + uint32_t guest_cpu_freq; // can be lower than host CPU freq! + + sint64_t guest_host_offset;// Offset of monotonic guest time from host time + sint64_t tsc_guest_offset; // Offset of guest TSC from monotonic guest time + + uint64_t last_update; // Last time (in monotonic guest time) the + // timers were updated + + uint64_t initial_time; // Time when VMM started. + + struct v3_msr tsc_aux; // Auxilliary MSR for RDTSCP + + // Installed Timers slaved off of the guest monotonic TSC uint_t num_timers; struct list_head timers; }; - - - struct vm_timer_ops { - void (*update_time)(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * priv_data); + void (*update_timer)(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * priv_data); void (*advance_timer)(struct guest_info * info, void * private_data); }; @@ -59,18 +60,48 @@ struct vm_timer { struct list_head timer_link; }; +// Basic functions for handling passage of time in palacios +void v3_init_time(struct guest_info * info); +int v3_start_time(struct guest_info * info); +int v3_adjust_time(struct guest_info * info); - - +// Basic functions for attaching timers to the passage of time int v3_add_timer(struct guest_info * info, struct vm_timer_ops * ops, void * private_data); int v3_remove_timer(struct guest_info * info, struct vm_timer * timer); +void v3_update_timers(struct guest_info * info); -void v3_advance_time(struct guest_info * info); +// Functions to return the different notions of time in Palacios. +static inline uint64_t v3_get_host_time(struct vm_time *t) { + uint64_t tmp; + rdtscll(tmp); + return tmp; +} + +// Returns *monotonic* guest time. +static inline uint64_t v3_get_guest_time(struct vm_time *t) { + return v3_get_host_time(t) + t->guest_host_offset; +} + +// Returns the TSC value seen by the guest +static inline uint64_t v3_get_guest_tsc(struct vm_time *t) { + return v3_get_guest_time(t) + t->tsc_guest_offset; +} + +// Returns offset of guest TSC from host TSC +static inline sint64_t v3_tsc_host_offset(struct vm_time *time_state) { + return time_state->guest_host_offset + time_state->tsc_guest_offset; +} + +// Functions for handling exits on the TSC when fully virtualizing +// the timestamp counter. +#define TSC_MSR 0x10 +#define TSC_AUX_MSR 0xC0000103 + +int v3_handle_rdtscp(struct guest_info *info); +int v3_handle_rdtsc(struct guest_info *info); -void v3_update_time(struct guest_info * info, ullong_t cycles); -void v3_init_time(struct guest_info * info); #endif // !__V3VEE__ diff --git a/palacios/src/devices/8254.c b/palacios/src/devices/8254.c index 51c3f04..ecb2fa0 100644 --- a/palacios/src/devices/8254.c +++ b/palacios/src/devices/8254.c @@ -236,7 +236,7 @@ static int handle_crystal_tics(struct vm_device * dev, struct channel * ch, uint #include -static void pit_update_time(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * private_data) { +static void pit_update_timer(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * private_data) { struct vm_device * dev = (struct vm_device *)private_data; struct pit * state = (struct pit *)dev->private_data; // ullong_t tmp_ctr = state->pit_counter; @@ -313,14 +313,6 @@ static void pit_update_time(struct guest_info * info, ullong_t cpu_cycles, ullon return; } - -static void pit_advance_time(struct guest_info * core, void * private_data) { - - v3_raise_irq(core->vm_info, 0); -} - - - /* This should call out to handle_SQR_WAVE_write, etc... */ static int handle_channel_write(struct channel * ch, char val) { @@ -624,8 +616,7 @@ static int pit_write_command(struct guest_info * core, ushort_t port, void * src static struct vm_timer_ops timer_ops = { - .update_time = pit_update_time, - .advance_timer = pit_advance_time, + .update_timer = pit_update_timer, }; diff --git a/palacios/src/devices/apic.c b/palacios/src/devices/apic.c index a844c6c..40b90a1 100644 --- a/palacios/src/devices/apic.c +++ b/palacios/src/devices/apic.c @@ -1100,7 +1100,7 @@ static struct intr_ctrl_ops intr_ops = { static struct vm_timer_ops timer_ops = { - .update_time = apic_update_time, + .update_timer = apic_update_time, }; diff --git a/palacios/src/devices/icc_bus.c b/palacios/src/devices/icc_bus.c index 79f9159..e9fe95c 100644 --- a/palacios/src/devices/icc_bus.c +++ b/palacios/src/devices/icc_bus.c @@ -135,7 +135,7 @@ static int deliver(uint32_t src_apic, struct apic_data *dest_apic, struct int_cm // Sanity check if (core->cpu_mode != INIT) { - PrintError("icc_bus: Warning: core %u is not in INIT state, ignored\n",core->cpu_id); + PrintError("icc_bus: Warning: core %u is not in INIT state (mode = %d), ignored\n",core->cpu_id, core->cpu_mode); // Only a warning, since INIT INIT SIPI is common break; } @@ -159,7 +159,7 @@ static int deliver(uint32_t src_apic, struct apic_data *dest_apic, struct int_cm // Sanity check if (core->cpu_mode!=SIPI) { - PrintError("icc_bus: core %u is not in SIPI state, ignored!\n",core->cpu_id); + PrintError("icc_bus: core %u is not in SIPI state (mode = %d), ignored!\n",core->cpu_id, core->cpu_mode); break; } @@ -204,8 +204,6 @@ static int deliver(uint32_t src_apic, struct apic_data *dest_apic, struct int_cm int v3_icc_send_ipi(struct vm_device * icc_bus, uint32_t src_apic, uint64_t icr_data, uint32_t extirq) { - PrintDebug("icc_bus: icc_bus=%p, src_apic=%u, icr_data=%llx, extirq=%u\n",icc_bus,src_apic,icr_data,extirq); - struct int_cmd_reg *icr = (struct int_cmd_reg *)&icr_data; struct icc_bus_state * state = (struct icc_bus_state *)icc_bus->private_data; diff --git a/palacios/src/palacios/svm.c b/palacios/src/palacios/svm.c index 7b33c8c..35f2047 100644 --- a/palacios/src/palacios/svm.c +++ b/palacios/src/palacios/svm.c @@ -80,8 +80,6 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) { // - - ctrl_area->svm_instrs.VMRUN = 1; ctrl_area->svm_instrs.VMMCALL = 1; ctrl_area->svm_instrs.VMLOAD = 1; @@ -99,6 +97,12 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) { ctrl_area->instrs.CPUID = 1; ctrl_area->instrs.HLT = 1; + +#ifdef CONFIG_TIME_VIRTUALIZE_TSC + ctrl_area->instrs.rdtsc = 1; + ctrl_area->svm_instrs.rdtscp = 1; +#endif + // guest_state->cr0 = 0x00000001; // PE /* @@ -422,7 +426,6 @@ static int update_irq_entry_state(struct guest_info * info) { int v3_svm_enter(struct guest_info * info) { vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data)); vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); - ullong_t tmp_tsc; addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0; // Conditionally yield the CPU if the timeslice has expired @@ -475,32 +478,26 @@ int v3_svm_enter(struct guest_info * info) { } #endif + v3_update_timers(info); - rdtscll(tmp_tsc); - v3_update_time(info, (tmp_tsc - info->time_state.cached_host_tsc)); - rdtscll(info->time_state.cached_host_tsc); - // guest_ctrl->TSC_OFFSET = info->time_state.guest_tsc - info->time_state.cached_host_tsc; + /* If this guest is frequency-lagged behind host time, wait + * for the appropriate host time before resuming the guest. */ + v3_adjust_time(info); - //V3_Print("Calling v3_svm_launch\n"); + guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state); + //V3_Print("Calling v3_svm_launch\n"); v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[info->cpu_id]); - - //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip); + //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip); v3_last_exit = (uint32_t)(guest_ctrl->exit_code); - //rdtscll(tmp_tsc); - // v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc); - //PrintDebug("SVM Returned\n"); info->num_exits++; - - - // Save Guest state from VMCB info->rip = guest_state->rip; info->vm_regs.rsp = guest_state->rsp; @@ -596,8 +593,7 @@ int v3_start_svm_guest(struct guest_info *info) { //PrintDebugVMCB((vmcb_t*)(info->vmm_data)); info->vm_info->run_state = VM_RUNNING; - rdtscll(info->yield_start_cycle); - + v3_start_time(info); while (1) { if (v3_svm_enter(info) == -1) { diff --git a/palacios/src/palacios/svm_handler.c b/palacios/src/palacios/svm_handler.c index d8b47c5..171d2b5 100644 --- a/palacios/src/palacios/svm_handler.c +++ b/palacios/src/palacios/svm_handler.c @@ -246,9 +246,24 @@ int v3_handle_svm_exit(struct guest_info * info, addr_t exit_code, addr_t exit_i return -1; } break; - - - + case VMEXIT_RDTSC: +#ifdef CONFIG_DEBUG_TIME + PrintDebug("RDTSC/RDTSCP\n"); +#endif + if (v3_handle_rdtsc(info) == -1) { + PrintError("Error Handling RDTSC instruction\n"); + return -1; + } + break; + case VMEXIT_RDTSCP: +#ifdef CONFIG_DEBUG_TIME + PrintDebug("RDTSCP\n"); +#endif + if (v3_handle_rdtscp(info) == -1) { + PrintError("Error Handling RDTSCP instruction\n"); + return -1; + } + break; /* Exits Following this line are NOT HANDLED */ diff --git a/palacios/src/palacios/vm_guest.c b/palacios/src/palacios/vm_guest.c index 4dddf6e..fe22f0b 100644 --- a/palacios/src/palacios/vm_guest.c +++ b/palacios/src/palacios/vm_guest.c @@ -365,23 +365,25 @@ void v3_print_GPRs(struct guest_info * info) { #include static int info_hcall(struct guest_info * core, uint_t hcall_id, void * priv_data) { v3_cpu_arch_t cpu_type = v3_get_cpu_type(v3_get_cpu_id()); - + int cpu_valid = 0; + v3_print_guest_state(core); - // init SVM/VMX #ifdef CONFIG_SVM if ((cpu_type == V3_SVM_CPU) || (cpu_type == V3_SVM_REV3_CPU)) { + cpu_valid = 1; PrintDebugVMCB((vmcb_t *)(core->vmm_data)); } #endif #ifdef CONFIG_VMX - else if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) { + if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) { + cpu_valid = 1; v3_print_vmcs(); } #endif - else { - PrintError("Invalid CPU Type\n"); + if (!cpu_valid) { + PrintError("Invalid CPU Type 0x%x\n", cpu_type); return -1; } @@ -405,7 +407,7 @@ static int info_hcall(struct guest_info * core, uint_t hcall_id, void * priv_dat int v3_init_vm(struct v3_vm_info * vm) { v3_cpu_arch_t cpu_type = v3_get_cpu_type(v3_get_cpu_id()); - + int cpu_valid = 0; if (v3_get_foreground_vm() == NULL) { v3_set_foreground_vm(vm); @@ -449,24 +451,23 @@ int v3_init_vm(struct v3_vm_info * vm) { if ((cpu_type == V3_SVM_CPU) || (cpu_type == V3_SVM_REV3_CPU)) { v3_init_svm_io_map(vm); v3_init_svm_msr_map(vm); - } + cpu_valid = 1; + } #endif #ifdef CONFIG_VMX - else if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) { + if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) { v3_init_vmx_io_map(vm); v3_init_vmx_msr_map(vm); + cpu_valid = 1; } #endif - else { - PrintError("Invalid CPU Type\n"); + if (!cpu_valid) { + PrintError("Invalid CPU Type 0x%x\n", cpu_type); return -1; } - - v3_register_hypercall(vm, GUEST_INFO_HCALL, info_hcall, NULL); - V3_Print("GUEST_INFO_HCALL=%x\n", GUEST_INFO_HCALL); return 0; @@ -474,6 +475,7 @@ int v3_init_vm(struct v3_vm_info * vm) { int v3_init_core(struct guest_info * core) { v3_cpu_arch_t cpu_type = v3_get_cpu_type(v3_get_cpu_id()); + int cpu_valid = 0; struct v3_vm_info * vm = core->vm_info; /* @@ -505,18 +507,20 @@ int v3_init_core(struct guest_info * core) { PrintError("Error in SVM initialization\n"); return -1; } + cpu_valid = 1; } #endif #ifdef CONFIG_VMX - else if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) { + if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) { if (v3_init_vmx_vmcs(core, vm->vm_class) == -1) { PrintError("Error in VMX initialization\n"); return -1; } + cpu_valid = 1; } #endif - else { - PrintError("Invalid CPU Type\n"); + if (!cpu_valid) { + PrintError("Invalid CPU Type 0x%x\n", cpu_type); return -1; } diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c index 49f30ba..e129d29 100644 --- a/palacios/src/palacios/vmm.c +++ b/palacios/src/palacios/vmm.c @@ -292,7 +292,7 @@ v3_cpu_mode_t v3_get_host_cpu_mode() { void v3_yield_cond(struct guest_info * info) { uint64_t cur_cycle; - rdtscll(cur_cycle); + cur_cycle = v3_get_host_time(&info->time_state); if (cur_cycle > (info->yield_start_cycle + info->vm_info->yield_cycle_period)) { @@ -301,7 +301,7 @@ void v3_yield_cond(struct guest_info * info) { (void *)cur_cycle, (void *)info->yield_start_cycle, (void *)info->yield_cycle_period); */ V3_Yield(); - rdtscll(info->yield_start_cycle); + info->yield_start_cycle = v3_get_host_time(&info->time_state); } } @@ -315,7 +315,7 @@ void v3_yield(struct guest_info * info) { V3_Yield(); if (info) { - rdtscll(info->yield_start_cycle); + info->yield_start_cycle = v3_get_host_time(&info->time_state); } } diff --git a/palacios/src/palacios/vmm_config.c b/palacios/src/palacios/vmm_config.c index 738d2ee..ebad8b7 100644 --- a/palacios/src/palacios/vmm_config.c +++ b/palacios/src/palacios/vmm_config.c @@ -497,6 +497,7 @@ struct v3_vm_info * v3_config_guest(void * cfg_blob) { info->cpu_id = i; info->vm_info = vm; + info->core_cfg_data = per_core_cfg; if (pre_config_core(info, per_core_cfg) == -1) { PrintError("Error in core %d preconfiguration\n", i); diff --git a/palacios/src/palacios/vmm_halt.c b/palacios/src/palacios/vmm_halt.c index 5015046..7970a40 100644 --- a/palacios/src/palacios/vmm_halt.c +++ b/palacios/src/palacios/vmm_halt.c @@ -38,17 +38,12 @@ int v3_handle_halt(struct guest_info * info) { if (info->cpl != 0) { v3_raise_exception(info, GPF_EXCEPTION); } else { - uint64_t yield_start = 0; - PrintDebug("CPU Yield\n"); while (!v3_intr_pending(info)) { - rdtscll(yield_start); + /* Since we're in an exit, time is already paused here, so no need to pause again. */ v3_yield(info); - - v3_update_time(info, yield_start - info->time_state.cached_host_tsc); - - rdtscll(info->time_state.cached_host_tsc); + v3_update_timers(info); /* At this point, we either have some combination of interrupts, including perhaps a timer interrupt, or diff --git a/palacios/src/palacios/vmm_msr.c b/palacios/src/palacios/vmm_msr.c index 96d3ddc..66a14d4 100644 --- a/palacios/src/palacios/vmm_msr.c +++ b/palacios/src/palacios/vmm_msr.c @@ -26,6 +26,8 @@ void v3_init_msr_map(struct v3_vm_info * vm) { struct v3_msr_map * msr_map = &(vm->msr_map); + PrintDebug("Initializing MSR map.\n"); + INIT_LIST_HEAD(&(msr_map->hook_list)); msr_map->num_hooks = 0; diff --git a/palacios/src/palacios/vmm_time.c b/palacios/src/palacios/vmm_time.c index b169669..44affe4 100644 --- a/palacios/src/palacios/vmm_time.c +++ b/palacios/src/palacios/vmm_time.c @@ -12,6 +12,7 @@ * All rights reserved. * * Author: Jack Lange + * Patrick G. Bridges * * This is free software. You are permitted to use, * redistribute, and modify it as specified in the file "V3VEE_LICENSE". @@ -21,10 +22,57 @@ #include #include +#ifndef CONFIG_DEBUG_TIME +#undef PrintDebug +#define PrintDebug(fmt, args...) +#endif + +/* Overview + * + * Time handling in VMMs is challenging, and Palacios uses the highest + * resolution, lowest overhead timer on modern CPUs that it can - the + * processor timestamp counter (TSC). Note that on somewhat old processors + * this can be problematic; in particular, older AMD processors did not + * have a constant rate timestamp counter in the face of power management + * events. However, the latest Intel and AMD CPUs all do (should...) have a + * constant rate TSC, and Palacios relies on this fact. + * + * Basically, Palacios keeps track of three quantities as it runs to manage + * the passage of time: + * (1) The host timestamp counter - read directly from HW and never written + * (2) A monotonic guest timestamp counter used to measure the progression of + * time in the guest. This is computed using an offsets from (1) above. + * (3) The actual guest timestamp counter (which can be written by + * writing to the guest TSC MSR - MSR 0x10) from the monotonic guest TSC. + * This is also computed as an offset from (2) above when the TSC and + * this offset is updated when the TSC MSR is written. + * + * The value used to offset the guest TSC from the host TSC is the *sum* of all + * of these offsets (2 and 3) above + * + * Because all other devices are slaved off of the passage of time in the guest, + * it is (2) above that drives the firing of other timers in the guest, + * including timer devices such as the Programmable Interrupt Timer (PIT). + * + * Future additions: + * (1) Add support for temporarily skewing guest time off of where it should + * be to support slack simulation of guests. The idea is that simulators + * set this skew to be the difference between how much time passed for a + * simulated feature and a real implementation of that feature, making + * pass at a different rate from real time on this core. The VMM will then + * attempt to move this skew back towards 0 subject to resolution/accuracy + * constraints from various system timers. + * + * The main effort in doing this will be to get accuracy/resolution + * information from each local timer and to use this to bound how much skew + * is removed on each exit. + */ + + static int handle_cpufreq_hcall(struct guest_info * info, uint_t hcall_id, void * priv_data) { struct vm_time * time_state = &(info->time_state); - info->vm_regs.rbx = time_state->cpu_freq; + info->vm_regs.rbx = time_state->guest_cpu_freq; PrintDebug("Guest request cpu frequency: return %ld\n", (long)info->vm_regs.rbx); @@ -33,23 +81,49 @@ static int handle_cpufreq_hcall(struct guest_info * info, uint_t hcall_id, void -void v3_init_time(struct guest_info * info) { +int v3_start_time(struct guest_info * info) { + /* We start running with guest_time == host_time */ + uint64_t t = v3_get_host_time(&info->time_state); + + PrintDebug("Starting initial guest time as %llu\n", t); + info->time_state.last_update = t; + info->time_state.initial_time = t; + info->yield_start_cycle = t; + return 0; +} + +// If the guest is supposed to run slower than the host, yield out until +// the host time is appropriately far along; +int v3_adjust_time(struct guest_info * info) { struct vm_time * time_state = &(info->time_state); - time_state->cpu_freq = V3_CPU_KHZ(); - - time_state->guest_tsc = 0; - time_state->cached_host_tsc = 0; - // time_state->pending_cycles = 0; - - INIT_LIST_HEAD(&(time_state->timers)); - time_state->num_timers = 0; + if (time_state->host_cpu_freq == time_state->guest_cpu_freq) { + time_state->guest_host_offset = 0; + } else { + uint64_t guest_time, guest_elapsed, desired_elapsed; + uint64_t host_time, target_host_time; - v3_register_hypercall(info->vm_info, TIME_CPUFREQ_HCALL, handle_cpufreq_hcall, NULL); -} + guest_time = v3_get_guest_time(time_state); + /* Compute what host time this guest time should correspond to. */ + guest_elapsed = (guest_time - time_state->initial_time); + desired_elapsed = (guest_elapsed * time_state->host_cpu_freq) / time_state->guest_cpu_freq; + target_host_time = time_state->initial_time + desired_elapsed; -int v3_add_timer(struct guest_info * info, struct vm_timer_ops * ops, void * private_data) { + /* Yield until that host time is reached */ + host_time = v3_get_host_time(time_state); + while (host_time < target_host_time) { + v3_yield(info); + host_time = v3_get_host_time(time_state); + } + + time_state->guest_host_offset = (sint64_t)guest_time - (sint64_t)host_time; + } + return 0; +} + +int v3_add_timer(struct guest_info * info, struct vm_timer_ops * ops, + void * private_data) { struct vm_timer * timer = NULL; timer = (struct vm_timer *)V3_Malloc(sizeof(struct vm_timer)); V3_ASSERT(timer != NULL); @@ -63,7 +137,6 @@ int v3_add_timer(struct guest_info * info, struct vm_timer_ops * ops, void * pri return 0; } - int v3_remove_timer(struct guest_info * info, struct vm_timer * timer) { list_del(&(timer->timer_link)); info->time_state.num_timers--; @@ -72,34 +145,182 @@ int v3_remove_timer(struct guest_info * info, struct vm_timer * timer) { return 0; } +void v3_update_timers(struct guest_info * info) { + struct vm_timer * tmp_timer; + uint64_t old_time = info->time_state.last_update; + uint64_t cycles; + info->time_state.last_update = v3_get_guest_time(&info->time_state); + cycles = info->time_state.last_update - old_time; -void v3_update_time(struct guest_info * info, uint64_t cycles) { - struct vm_timer * tmp_timer; + list_for_each_entry(tmp_timer, &(info->time_state.timers), timer_link) { + tmp_timer->ops->update_timer(info, cycles, info->time_state.guest_cpu_freq, tmp_timer->private_data); + } +} + +/* + * Handle full virtualization of the time stamp counter. As noted + * above, we don't store the actual value of the TSC, only the guest's + * offset from monotonic guest's time. If the guest writes to the TSC, we + * handle this by changing that offset. + * + * Possible TODO: Proper hooking of TSC read/writes? + */ + +int v3_rdtsc(struct guest_info * info) { + uint64_t tscval = v3_get_guest_tsc(&info->time_state); + info->vm_regs.rdx = tscval >> 32; + info->vm_regs.rax = tscval & 0xffffffffLL; + return 0; +} + +int v3_handle_rdtsc(struct guest_info * info) { + v3_rdtsc(info); - // cycles *= 8; + info->vm_regs.rax &= 0x00000000ffffffffLL; + info->vm_regs.rdx &= 0x00000000ffffffffLL; -// cycles /= 150; + info->rip += 2; + + return 0; +} - info->time_state.guest_tsc += cycles; +int v3_rdtscp(struct guest_info * info) { + int ret; + /* First get the MSR value that we need. It's safe to futz with + * ra/c/dx here since they're modified by this instruction anyway. */ + info->vm_regs.rcx = TSC_AUX_MSR; + ret = v3_handle_msr_read(info); + if (ret) return ret; + info->vm_regs.rcx = info->vm_regs.rax; - list_for_each_entry(tmp_timer, &(info->time_state.timers), timer_link) { - tmp_timer->ops->update_time(info, cycles, info->time_state.cpu_freq, tmp_timer->private_data); - } - + /* Now do the TSC half of the instruction */ + ret = v3_rdtsc(info); + if (ret) return ret; + + return 0; +} + + +int v3_handle_rdtscp(struct guest_info * info) { + + v3_rdtscp(info); + + info->vm_regs.rax &= 0x00000000ffffffffLL; + info->vm_regs.rcx &= 0x00000000ffffffffLL; + info->vm_regs.rdx &= 0x00000000ffffffffLL; + + info->rip += 3; + + return 0; +} +static int tsc_aux_msr_read_hook(struct guest_info *info, uint_t msr_num, + struct v3_msr *msr_val, void *priv) { + struct vm_time * time_state = &(info->time_state); + + V3_ASSERT(msr_num == TSC_AUX_MSR); + msr_val->lo = time_state->tsc_aux.lo; + msr_val->hi = time_state->tsc_aux.hi; - //info->time_state.pending_cycles = 0; + return 0; +} + +static int tsc_aux_msr_write_hook(struct guest_info *info, uint_t msr_num, + struct v3_msr msr_val, void *priv) { + struct vm_time * time_state = &(info->time_state); + + V3_ASSERT(msr_num == TSC_AUX_MSR); + time_state->tsc_aux.lo = msr_val.lo; + time_state->tsc_aux.hi = msr_val.hi; + + return 0; +} + +static int tsc_msr_read_hook(struct guest_info *info, uint_t msr_num, + struct v3_msr *msr_val, void *priv) { + uint64_t time = v3_get_guest_tsc(&info->time_state); + + V3_ASSERT(msr_num == TSC_MSR); + msr_val->hi = time >> 32; + msr_val->lo = time & 0xffffffffLL; + + return 0; +} + +static int tsc_msr_write_hook(struct guest_info *info, uint_t msr_num, + struct v3_msr msr_val, void *priv) { + struct vm_time * time_state = &(info->time_state); + uint64_t guest_time, new_tsc; + V3_ASSERT(msr_num == TSC_MSR); + new_tsc = (((uint64_t)msr_val.hi) << 32) | (uint64_t)msr_val.lo; + guest_time = v3_get_guest_time(time_state); + time_state->tsc_guest_offset = (sint64_t)new_tsc - (sint64_t)guest_time; + + return 0; } -void v3_advance_time(struct guest_info * core) { - struct vm_timer * tmp_timer; +static int init_vm_time(struct v3_vm_info *vm_info) { + int ret; - list_for_each_entry(tmp_timer, &(core->time_state.timers), timer_link) { - tmp_timer->ops->advance_timer(core, tmp_timer->private_data); + PrintDebug("Installing TSC MSR hook.\n"); + ret = v3_hook_msr(vm_info, TSC_MSR, + tsc_msr_read_hook, tsc_msr_write_hook, NULL); + + PrintDebug("Installing TSC_AUX MSR hook.\n"); + if (ret) return ret; + ret = v3_hook_msr(vm_info, TSC_AUX_MSR, tsc_aux_msr_read_hook, + tsc_aux_msr_write_hook, NULL); + if (ret) return ret; + + PrintDebug("Registering TIME_CPUFREQ hypercall.\n"); + ret = v3_register_hypercall(vm_info, TIME_CPUFREQ_HCALL, + handle_cpufreq_hcall, NULL); + return ret; +} + +void v3_init_time(struct guest_info * info) { + struct vm_time * time_state = &(info->time_state); + v3_cfg_tree_t * cfg_tree = info->core_cfg_data; + static int one_time = 0; + char *khz; + + time_state->host_cpu_freq = V3_CPU_KHZ(); + khz = v3_cfg_val(cfg_tree, "khz"); + if (khz) { + time_state->guest_cpu_freq = atoi(khz); + PrintDebug("Core %d CPU frequency requested at %d khz.\n", + info->cpu_id, time_state->guest_cpu_freq); + } + + if (!khz || time_state->guest_cpu_freq > time_state->host_cpu_freq) { + time_state->guest_cpu_freq = time_state->host_cpu_freq; } - + PrintDebug("Core %d CPU frequency set to %d KHz (host CPU frequency = %d KHz).\n", info->cpu_id, time_state->guest_cpu_freq, time_state->host_cpu_freq); + time_state->initial_time = 0; + time_state->last_update = 0; + time_state->guest_host_offset = 0; + time_state->tsc_guest_offset = 0; + INIT_LIST_HEAD(&(time_state->timers)); + time_state->num_timers = 0; + + time_state->tsc_aux.lo = 0; + time_state->tsc_aux.hi = 0; + + if (!one_time) { + init_vm_time(info->vm_info); + one_time = 1; + } } + + + + + + + + + diff --git a/palacios/src/palacios/vmx.c b/palacios/src/palacios/vmx.c index 18d183b..7cea8a1 100644 --- a/palacios/src/palacios/vmx.c +++ b/palacios/src/palacios/vmx.c @@ -225,6 +225,10 @@ static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state) vmx_state->pri_proc_ctrls.invlpg_exit = 1; vmx_state->pri_proc_ctrls.use_msr_bitmap = 1; vmx_state->pri_proc_ctrls.pause_exit = 1; + vmx_state->pri_proc_ctrls.tsc_offset = 1; +#ifdef CONFIG_TIME_VIRTUALIZE_TSC + vmx_state->pri_proc_ctrls.rdtsc_exit = 1; +#endif vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->vm_info->io_map.arch_data)); vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, @@ -636,13 +640,12 @@ static void print_exit_log(struct guest_info * info) { */ int v3_vmx_enter(struct guest_info * info) { int ret = 0; - uint64_t tmp_tsc = 0; + uint32_t tsc_offset_low, tsc_offset_high; struct vmx_exit_info exit_info; // Conditionally yield the CPU if the timeslice has expired v3_yield_cond(info); - // v3_print_guest_state(info); // disable global interrupts for vm state transition @@ -665,10 +668,16 @@ int v3_vmx_enter(struct guest_info * info) { vmcs_write(VMCS_GUEST_CR3, guest_cr3); } - // We do timer injection here to track real host time. - rdtscll(tmp_tsc); - v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc); - rdtscll(info->time_state.cached_host_tsc); + v3_update_timers(info); + + /* If this guest is frequency-lagged behind host time, wait + * for the appropriate host time before resuming the guest. */ + v3_adjust_time(info); + + tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff); + tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff); + check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high); + check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low); if (info->vm_info->run_state == VM_STOPPED) { info->vm_info->run_state = VM_RUNNING; @@ -688,12 +697,8 @@ int v3_vmx_enter(struct guest_info * info) { return -1; } - // rdtscll(tmp_tsc); - // v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc); - info->num_exits++; - /* Update guest state */ v3_vmx_save_vmcs(info); @@ -741,11 +746,38 @@ int v3_vmx_enter(struct guest_info * info) { int v3_start_vmx_guest(struct guest_info* info) { + PrintDebug("Starting VMX core %u\n",info->cpu_id); + if (info->cpu_mode==INIT) { + PrintDebug("VMX core %u: I am an AP in INIT mode, waiting for that to change\n",info->cpu_id); + while (info->cpu_mode==INIT) { + v3_yield(info); + //PrintDebug("VMX core %u: still waiting for INIT\n",info->cpu_id); + } + PrintDebug("VMX core %u: I am out of INIT\n",info->cpu_id); + if (info->cpu_mode==SIPI) { + PrintDebug("VMX core %u: I am waiting on a SIPI to set my starting address\n",info->cpu_id); + while (info->cpu_mode==SIPI) { + v3_yield(info); + //PrintDebug("VMX core %u: still waiting for SIPI\n",info->cpu_id); + } + } + PrintDebug("VMX core %u: I have my SIPI\n", info->cpu_id); + } + + if (info->cpu_mode!=REAL) { + PrintError("VMX core %u: I am not in REAL mode at launch! Huh?!\n", info->cpu_id); + return -1; + } + + PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x), RIP=0x%p\n", + info->cpu_id, info->segments.cs.selector, (void*)(info->segments.cs.base), + info->segments.cs.limit,(void*)(info->rip)); + - PrintDebug("Launching VMX guest\n"); - rdtscll(info->time_state.cached_host_tsc); + PrintDebug("VMX core %u: Launching VMX VM\n", info->cpu_id); + v3_start_time(info); while (1) { if (v3_vmx_enter(info) == -1) { diff --git a/palacios/src/palacios/vmx_handler.c b/palacios/src/palacios/vmx_handler.c index d88210d..357f0d2 100644 --- a/palacios/src/palacios/vmx_handler.c +++ b/palacios/src/palacios/vmx_handler.c @@ -99,6 +99,18 @@ int v3_handle_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_inf } break; + + case VMEXIT_RDTSC: +#ifdef CONFIG_DEBUG_TIME + PrintDebug("RDTSC\n"); +#endif + if (v3_handle_rdtsc(info) == -1) { + PrintError("Error Handling RDTSC instruction\n"); + return -1; + } + + break; + case VMEXIT_CPUID: if (v3_handle_cpuid(info) == -1) { PrintError("Error Handling CPUID instruction\n");