#include <palacios/vmx_msr.h>
#include <palacios/vmm_decoder.h>
#include <palacios/vmm_barrier.h>
+#include <palacios/vmm_timeout.h>
#ifdef V3_CONFIG_CHECKPOINT
#include <palacios/vmm_checkpoint.h>
return 1;
}
+
+
+
return 0;
}
return (addr_t)V3_PAddr((void *)vmcs_page);
}
+/*
+
+static int debug_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * src, void * priv_data) {
+ struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
+ V3_Print("\n\nEFER READ\n");
+
+ v3_print_guest_state(core);
+
+ src->value = efer->value;
+ return 0;
+}
+
+static int debug_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
+ struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
+ V3_Print("\n\nEFER WRITE\n");
+
+ v3_print_guest_state(core);
+
+ efer->value = src.value;
+ {
+ struct vmx_data * vmx_state = core->vmm_data;
+
+ V3_Print("Trapping page faults and GPFs\n");
+ vmx_state->excp_bmap.pf = 1;
+ vmx_state->excp_bmap.gp = 1;
+
+ check_vmcs_write(VMCS_EXCP_BITMAP, vmx_state->excp_bmap.value);
+ }
+
+ return 0;
+}
+*/
static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
int vmx_ret = 0;
+ /* Get Available features */
+ struct vmx_pin_ctrls avail_pin_ctrls;
+ avail_pin_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.pin_ctrls));
+ /* ** */
+
+
// disable global interrupts for vm state initialization
v3_disable_ints();
vmx_state->pin_ctrls.ext_int_exit = 1;
+ /* We enable the preemption timer by default to measure accurate guest time */
+ if (avail_pin_ctrls.active_preempt_timer) {
+ V3_Print("VMX Preemption Timer is available\n");
+ vmx_state->pin_ctrls.active_preempt_timer = 1;
+ vmx_state->exit_ctrls.save_preempt_timer = 1;
+ }
+
vmx_state->pri_proc_ctrls.hlt_exit = 1;
vmx_state->exit_ctrls.host_64_on = 1;
#endif
- // Hook all accesses to EFER register
- v3_hook_msr(core->vm_info, EFER_MSR,
- &v3_handle_efer_read,
- &v3_handle_efer_write,
- core);
+
// Restore host's EFER register on each VM EXIT
vmx_state->exit_ctrls.ld_efer = 1;
vmx_state->exit_ctrls.save_efer = 1;
vmx_state->entry_ctrls.ld_efer = 1;
- // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
- vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
+ vmx_state->exit_ctrls.save_pat = 1;
+ vmx_state->exit_ctrls.ld_pat = 1;
+ vmx_state->entry_ctrls.ld_pat = 1;
+
+ /* Temporary GPF trap */
+ // vmx_state->excp_bmap.gp = 1;
+ // Setup Guests initial PAT field
+ vmx_ret |= check_vmcs_write(VMCS_GUEST_PAT, 0x0007040600070406LL);
/* Setup paging */
if (core->shdw_pg_mode == SHADOW_PAGING) {
#define CR0_WP 0x00010000 // To ensure mem hooks work
vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
+
+ // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
+ vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
+
core->ctrl_regs.cr3 = core->direct_map_pt;
// vmx_state->pinbased_ctrls |= NMI_EXIT;
// Setup VMX Assist
v3_vmxassist_init(core, vmx_state);
+ // Hook all accesses to EFER register
+ v3_hook_msr(core->vm_info, EFER_MSR,
+ &v3_handle_efer_read,
+ &v3_handle_efer_write,
+ core);
+
} else if ((core->shdw_pg_mode == NESTED_PAGING) &&
(v3_cpu_types[core->pcpu_id] == V3_VMX_EPT_CPU)) {
// vmx_state->pinbased_ctrls |= NMI_EXIT;
+ // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
+ vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
+
/* Disable CR exits */
vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
return -1;
}
+ // Hook all accesses to EFER register
+ v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
+
} else if ((core->shdw_pg_mode == NESTED_PAGING) &&
(v3_cpu_types[core->pcpu_id] == V3_VMX_EPT_UG_CPU)) {
int i = 0;
core->rip = 0xfff0;
core->vm_regs.rdx = 0x00000f00;
core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
- core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
-
+ core->ctrl_regs.cr0 = 0x00000030;
+ core->ctrl_regs.cr4 = 0x00002010; // Enable VMX and PSE flag
+
core->segments.cs.selector = 0xf000;
core->segments.cs.limit = 0xffff;
core->segments.ldtr.selector = 0x0000;
core->segments.ldtr.limit = 0x0000ffff;
core->segments.ldtr.base = 0x0000000000000000LL;
- core->segments.ldtr.type = 2;
+ core->segments.ldtr.type = 0x2;
core->segments.ldtr.present = 1;
core->segments.tr.selector = 0x0000;
vmx_state->pri_proc_ctrls.invlpg_exit = 0;
+ // Cause VM_EXIT whenever the CR4.VMXE bit is set
+ vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
+
+
if (v3_init_ept(core, &hw_info) == -1) {
PrintError("Error initializing EPT\n");
return -1;
}
+ // Hook all accesses to EFER register
+ //v3_hook_msr(core->vm_info, EFER_MSR, &debug_efer_read, &debug_efer_write, core);
+ v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
} else {
PrintError("Invalid Virtual paging mode\n");
return -1;
// save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
{
-#define IA32_STAR 0xc0000081
-#define IA32_LSTAR 0xc0000082
-#define IA32_FMASK 0xc0000084
-#define IA32_KERN_GS_BASE 0xc0000102
-#define IA32_CSTAR 0xc0000083 // Compatibility mode STAR (ignored for now... hopefully its not that important...)
-
- int msr_ret = 0;
-
- struct vmcs_msr_entry * exit_store_msrs = NULL;
- struct vmcs_msr_entry * exit_load_msrs = NULL;
- struct vmcs_msr_entry * entry_load_msrs = NULL;;
+ struct vmcs_msr_save_area * msr_entries = NULL;
int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
+ int msr_ret = 0;
V3_Print("Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
return -1;
}
- vmx_state->msr_area = V3_VAddr(V3_AllocPages(1));
-
- if (vmx_state->msr_area == NULL) {
+ vmx_state->msr_area_paddr = (addr_t)V3_AllocPages(1);
+
+ if (vmx_state->msr_area_paddr == (addr_t)NULL) {
PrintError("could not allocate msr load/store area\n");
return -1;
}
+ msr_entries = (struct vmcs_msr_save_area *)V3_VAddr((void *)(vmx_state->msr_area_paddr));
+ vmx_state->msr_area = msr_entries; // cache in vmx_info
+
+ memset(msr_entries, 0, PAGE_SIZE);
+
+ msr_entries->guest_star.index = IA32_STAR_MSR;
+ msr_entries->guest_lstar.index = IA32_LSTAR_MSR;
+ msr_entries->guest_fmask.index = IA32_FMASK_MSR;
+ msr_entries->guest_kern_gs.index = IA32_KERN_GS_BASE_MSR;
+
+ msr_entries->host_star.index = IA32_STAR_MSR;
+ msr_entries->host_lstar.index = IA32_LSTAR_MSR;
+ msr_entries->host_fmask.index = IA32_FMASK_MSR;
+ msr_entries->host_kern_gs.index = IA32_KERN_GS_BASE_MSR;
+
msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
-
-
- exit_store_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area);
- exit_load_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area + (sizeof(struct vmcs_msr_entry) * 4));
- entry_load_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area + (sizeof(struct vmcs_msr_entry) * 8));
+ msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
+ msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
+ msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->host_msrs));
- exit_store_msrs[0].index = IA32_STAR;
- exit_store_msrs[1].index = IA32_LSTAR;
- exit_store_msrs[2].index = IA32_FMASK;
- exit_store_msrs[3].index = IA32_KERN_GS_BASE;
-
- memcpy(exit_store_msrs, exit_load_msrs, sizeof(struct vmcs_msr_entry) * 4);
- memcpy(exit_store_msrs, entry_load_msrs, sizeof(struct vmcs_msr_entry) * 4);
-
- v3_get_msr(IA32_STAR, &(exit_load_msrs[0].hi), &(exit_load_msrs[0].lo));
- v3_get_msr(IA32_LSTAR, &(exit_load_msrs[1].hi), &(exit_load_msrs[1].lo));
- v3_get_msr(IA32_FMASK, &(exit_load_msrs[2].hi), &(exit_load_msrs[2].lo));
- v3_get_msr(IA32_KERN_GS_BASE, &(exit_load_msrs[3].hi), &(exit_load_msrs[3].lo));
+ msr_ret |= v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
+ msr_ret |= v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
+ msr_ret |= v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
+ msr_ret |= v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
+
+
+ // IMPORTANT: These MSRs appear to be cached by the hardware....
+ msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
+ msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
+ msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
+
+ msr_ret |= v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
+ msr_ret |= v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
+
+ msr_ret |= v3_hook_msr(core->vm_info, IA32_PAT_MSR, NULL, NULL, NULL);
+
+ // Not sure what to do about this... Does not appear to be an explicit hardware cache version...
+ msr_ret |= v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
+
+ if (msr_ret != 0) {
+ PrintError("Error configuring MSR save/restore area\n");
+ return -1;
+ }
- msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(exit_store_msrs));
- msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(exit_load_msrs));
- msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(entry_load_msrs));
}
return -1;
}
+ /*
if (v3_update_vmcs_host_state(core)) {
PrintError("Could not write host state\n");
return -1;
}
+ */
// reenable global interrupts for vm state initialization now
// that the vm state is initialized. If another VM kicks us off,
static struct vmx_exit_info exit_log[10];
+static uint64_t rip_log[10];
+
+
static void print_exit_log(struct guest_info * info) {
int cnt = info->num_exits % 10;
V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
+ V3_Print("\tguest_linear_addr= %p\n", (void *)(addr_t)tmp->guest_linear_addr);
+ V3_Print("\tRIP = %p\n", (void *)rip_log[cnt]);
+
cnt--;
}
+
+
/*
* CAUTION and DANGER!!!
*
*/
int v3_vmx_enter(struct guest_info * info) {
int ret = 0;
- //uint32_t tsc_offset_low, tsc_offset_high;
+ sint64_t tsc_offset;
+ uint32_t tsc_offset_low, tsc_offset_high;
struct vmx_exit_info exit_info;
struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
+ uint64_t guest_cycles = 0;
// Conditionally yield the CPU if the timeslice has expired
v3_yield_cond(info);
- // Perform any additional yielding needed for time adjustment
- v3_adjust_time(info);
-
// disable global interrupts for vm state transition
v3_disable_ints();
// Update timer devices late after being in the VM so that as much
- // of hte time in the VM is accounted for as possible. Also do it before
+ // of the time in the VM is accounted for as possible. Also do it before
// updating IRQ entry state so that any interrupts the timers raise get
// handled on the next VM entry. Must be done with interrupts disabled.
+ v3_advance_time(info);
v3_update_timers(info);
if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
vmcs_write(VMCS_GUEST_CR3, guest_cr3);
}
+
// Perform last-minute time bookkeeping prior to entering the VM
v3_time_enter_vm(info);
+
+ tsc_offset = v3_tsc_host_offset(&info->time_state);
+ tsc_offset_high = (uint32_t)(( tsc_offset >> 32) & 0xffffffff);
+ tsc_offset_low = (uint32_t)(tsc_offset & 0xffffffff);
+
+ check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
+ check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
- // tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
- // tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
- // check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
- // check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
+
if (v3_update_vmcs_host_state(info)) {
v3_enable_ints();
PrintError("Could not write host state\n");
return -1;
}
+
+ if (vmx_info->pin_ctrls.active_preempt_timer) {
+ /* Preemption timer is active */
+ uint32_t preempt_window = 0xffffffff;
+ if (info->timeouts.timeout_active) {
+ preempt_window = info->timeouts.next_timeout;
+ }
+
+ check_vmcs_write(VMCS_PREEMPT_TIMER, preempt_window);
+ }
+
- if (vmx_info->state == VMX_UNLAUNCHED) {
- vmx_info->state = VMX_LAUNCHED;
+ {
+ uint64_t entry_tsc = 0;
+ uint64_t exit_tsc = 0;
- info->vm_info->run_state = VM_RUNNING;
- ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
- } else {
- V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
- ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
- }
-
+ if (vmx_info->state == VMX_UNLAUNCHED) {
+ vmx_info->state = VMX_LAUNCHED;
+ rdtscll(entry_tsc);
+ ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
+ rdtscll(exit_tsc);
+
+ } else {
+ V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
+ rdtscll(entry_tsc);
+ ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
+ rdtscll(exit_tsc);
+ }
+ guest_cycles = exit_tsc - entry_tsc;
+ }
// PrintDebug("VMX Exit: ret=%d\n", ret);
}
+ info->num_exits++;
+
+ /* If we have the preemption time, then use it to get more accurate guest time */
+ if (vmx_info->pin_ctrls.active_preempt_timer) {
+ uint32_t cycles_left = 0;
+ check_vmcs_read(VMCS_PREEMPT_TIMER, &(cycles_left));
+
+ if (info->timeouts.timeout_active) {
+ guest_cycles = info->timeouts.next_timeout - cycles_left;
+ } else {
+ guest_cycles = 0xffffffff - cycles_left;
+ }
+ }
// Immediate exit from VM time bookkeeping
- v3_time_exit_vm(info);
+ v3_time_exit_vm(info, &guest_cycles);
- info->num_exits++;
/* Update guest state */
v3_vmx_save_vmcs(info);
info->cpu_mode = v3_get_vm_cpu_mode(info);
+
check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
//PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
exit_log[info->num_exits % 10] = exit_info;
+ rip_log[info->num_exits % 10] = get_addr_linear(info, info->rip, &(info->segments.cs));
#ifdef V3_CONFIG_SYMCALL
if (info->sym_core_state.symcall_state.sym_call_active == 0) {
return -1;
}
+ if (info->timeouts.timeout_active) {
+ /* Check to see if any timeouts have expired */
+ v3_handle_timeouts(info, guest_cycles);
+ }
+
return 0;
}
if (info->vcpu_id == 0) {
info->core_run_state = CORE_RUNNING;
- info->vm_info->run_state = VM_RUNNING;
} else {
PrintDebug("VMX core %u: Waiting for core initialization\n", info->vcpu_id);
while (info->core_run_state == CORE_STOPPED) {
+
+ if (info->vm_info->run_state == VM_STOPPED) {
+ // The VM was stopped before this core was initialized.
+ return 0;
+ }
+
v3_yield(info);
//PrintDebug("VMX core %u: still waiting for INIT\n",info->vcpu_id);
}
void v3_init_vmx_cpu(int cpu_id) {
addr_t vmx_on_region = 0;
+ extern v3_cpu_arch_t v3_mach_type;
- if (cpu_id == 0) {
+ if (v3_mach_type == V3_INVALID_CPU) {
if (v3_init_vmx_hw(&hw_info) == -1) {
PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
return;
v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
}
}
+
}