#include <palacios/vmm_lowlevel.h>
#include <palacios/vmm_ctrl_regs.h>
#include <palacios/vmm_config.h>
+#include <palacios/vmm_time.h>
#include <palacios/vm_guest_mem.h>
#include <palacios/vmm_direct_paging.h>
#include <palacios/vmx_io.h>
#include <palacios/vmx_msr.h>
-static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
+#include <palacios/vmx_hw_info.h>
+
+#ifndef CONFIG_DEBUG_VMX
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+/* These fields contain the hardware feature sets supported by the local CPU */
+static struct vmx_hw_info hw_info;
+
+
+static addr_t active_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
+static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
int ret = 0;
- ret = vmcs_write(field,val);
+ ret = vmcs_write(field, val);
if (ret != VMX_SUCCESS) {
PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
return ret;
}
-#if 0
-// For the 32 bit reserved bit fields
-// MB1s are in the low 32 bits, MBZs are in the high 32 bits of the MSR
-static uint32_t sanitize_bits1(uint32_t msr_num, uint32_t val) {
- v3_msr_t mask_msr;
-
- PrintDebug("sanitize_bits1 (MSR:%x)\n", msr_num);
-
- v3_get_msr(msr_num, &mask_msr.hi, &mask_msr.lo);
-
- PrintDebug("MSR %x = %x : %x \n", msr_num, mask_msr.hi, mask_msr.lo);
-
- val |= mask_msr.lo;
- val |= mask_msr.hi;
-
- return val;
-}
-
-
-
-static addr_t sanitize_bits2(uint32_t msr_num0, uint32_t msr_num1, addr_t val) {
- v3_msr_t msr0, msr1;
- addr_t msr0_val, msr1_val;
-
- PrintDebug("sanitize_bits2 (MSR0=%x, MSR1=%x)\n", msr_num0, msr_num1);
-
- v3_get_msr(msr_num0, &msr0.hi, &msr0.lo);
- v3_get_msr(msr_num1, &msr1.hi, &msr1.lo);
-
- // This generates a mask that is the natural bit width of the CPU
- msr0_val = msr0.value;
- msr1_val = msr1.value;
-
- PrintDebug("MSR %x = %p, %x = %p \n", msr_num0, (void*)msr0_val, msr_num1, (void*)msr1_val);
-
- val |= msr0_val;
- val |= msr1_val;
-
- return val;
-}
-
-
-#endif
static addr_t allocate_vmcs() {
- reg_ex_t msr;
struct vmcs_data * vmcs_page = NULL;
PrintDebug("Allocating page\n");
vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
memset(vmcs_page, 0, 4096);
- v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
-
- vmcs_page->revision = ((struct vmx_basic_msr*)&msr)->revision;
- PrintDebug("VMX Revision: 0x%x\n",vmcs_page->revision);
+ vmcs_page->revision = hw_info.basic_info.revision;
+ PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
return (addr_t)V3_PAddr((void *)vmcs_page);
}
static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state) {
int vmx_ret = 0;
+ struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
+
+ // disable global interrupts for vm state initialization
+ v3_disable_ints();
PrintDebug("Loading VMCS\n");
vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
+ active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
+ vmx_state->state = VMX_UNLAUNCHED;
if (vmx_ret != VMX_SUCCESS) {
PrintError("VMPTRLD failed\n");
/********** Setup and VMX Control Fields from MSR ***********/
/* Setup IO map */
- v3_init_vmx_io_map(info);
- v3_init_vmx_msr_map(info);
+
struct v3_msr tmp_msr;
vmx_state->pri_proc_ctrls.invlpg_exit = 1;
vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
vmx_state->pri_proc_ctrls.pause_exit = 1;
+ vmx_state->pri_proc_ctrls.tsc_offset = 1;
+#ifdef CONFIG_TIME_VIRTUALIZE_TSC
+ vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
+#endif
- vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->io_map.arch_data));
+ vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->vm_info->io_map.arch_data));
vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR,
- (addr_t)V3_PAddr(info->io_map.arch_data) + PAGE_SIZE_4KB);
+ (addr_t)V3_PAddr(info->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
+
- vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(info->msr_map.arch_data));
+ vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(info->vm_info->msr_map.arch_data));
v3_get_msr(VMX_EXIT_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
vmx_state->exit_ctrls.value = tmp_msr.lo;
#define VMXASSIST_GDT 0x10000
addr_t vmxassist_gdt = 0;
- if (guest_pa_to_host_va(info, VMXASSIST_GDT, &vmxassist_gdt) == -1) {
+ if (v3_gpa_to_hva(info, VMXASSIST_GDT, &vmxassist_gdt) == -1) {
PrintError("Could not find VMXASSIST GDT destination\n");
return -1;
}
extern uint8_t v3_vmxassist_end[];
addr_t vmxassist_dst = 0;
- if (guest_pa_to_host_va(info, VMXASSIST_START, &vmxassist_dst) == -1) {
+ if (v3_gpa_to_hva(info, VMXASSIST_START, &vmxassist_dst) == -1) {
PrintError("Could not find VMXASSIST destination\n");
return -1;
}
info->dbg_regs.dr7 = 0x400;
+#ifdef __V3_64BIT__
vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
-
+#else
+ vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
+ vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
+#endif
if (v3_update_vmcs_ctrl_fields(info)) {
PrintError("Could not write control fields!\n");
}
- vmx_state->state = VMXASSIST_DISABLED;
+ vmx_state->assist_state = VMXASSIST_DISABLED;
+
+ // reenable global interrupts for vm state initialization now
+ // that the vm state is initialized. If another VM kicks us off,
+ // it'll update our vmx state so that we know to reload ourself
+ v3_enable_ints();
return 0;
}
PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
info->vmm_data = vmx_state;
+ vmx_state->state = VMX_UNLAUNCHED;
PrintDebug("Initializing VMCS (addr=%p)\n", info->vmm_data);
return 0;
}
+
+int v3_deinit_vmx_vmcs(struct guest_info * core) {
+ struct vmx_data * vmx_state = core->vmm_data;
+
+ V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
+
+ V3_Free(vmx_state);
+
+ return 0;
+}
+
+
static int update_irq_exit_state(struct guest_info * info) {
struct vmx_exit_idt_vec_info idt_vec_info;
check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
- if ((info->intr_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
+ if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
#ifdef CONFIG_DEBUG_INTERRUPTS
PrintDebug("Calling v3_injecting_intr\n");
#endif
- info->intr_state.irq_started = 0;
- v3_injecting_intr(info, info->intr_state.irq_vector, V3_EXTERNAL_IRQ);
+ info->intr_core_state.irq_started = 0;
+ v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
}
return 0;
static int update_irq_entry_state(struct guest_info * info) {
struct vmx_exit_idt_vec_info idt_vec_info;
- struct vmcs_interrupt_state intr_state;
+ struct vmcs_interrupt_state intr_core_state;
struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
- check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_state));
+ check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
/* Check for pending exceptions to inject */
if (v3_excp_pending(info)) {
int_info.valid = 1;
#ifdef CONFIG_DEBUG_INTERRUPTS
- PrintDebug("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)info->rip);
+ PrintDebug("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
#endif
check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
v3_injecting_excp(info, int_info.vector);
} else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) &&
- (intr_state.val == 0)) {
+ (intr_core_state.val == 0)) {
- if ((info->intr_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
+ if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
#ifdef CONFIG_DEBUG_INTERRUPTS
PrintDebug("IRQ pending from previous injection\n");
switch (v3_intr_pending(info)) {
case V3_EXTERNAL_IRQ: {
- info->intr_state.irq_vector = v3_get_intr(info);
- ent_int.vector = info->intr_state.irq_vector;
+ info->intr_core_state.irq_vector = v3_get_intr(info);
+ ent_int.vector = info->intr_core_state.irq_vector;
ent_int.type = 0;
ent_int.error_code = 0;
ent_int.valid = 1;
#ifdef CONFIG_DEBUG_INTERRUPTS
PrintDebug("Injecting Interrupt %d at exit %u(EIP=%p)\n",
- info->intr_state.irq_vector,
+ info->intr_core_state.irq_vector,
(uint32_t)info->num_exits,
- (void *)info->rip);
+ (void *)(addr_t)info->rip);
#endif
check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
- info->intr_state.irq_started = 1;
+ info->intr_core_state.irq_started = 1;
break;
}
*/
int v3_vmx_enter(struct guest_info * info) {
int ret = 0;
- uint64_t tmp_tsc = 0;
+ uint32_t tsc_offset_low, tsc_offset_high;
struct vmx_exit_info exit_info;
+ struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
// Conditionally yield the CPU if the timeslice has expired
v3_yield_cond(info);
+ // Perform any additional yielding needed for time adjustment
+ v3_adjust_time(info);
- // v3_print_guest_state(info);
+ // Update timer devices prior to entering VM.
+ v3_update_timers(info);
// disable global interrupts for vm state transition
v3_disable_ints();
+
+ if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) {
+ vmcs_load(vmx_info->vmcs_ptr_phys);
+ active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
+ }
+
+
v3_vmx_restore_vmcs(info);
-#ifdef CONFIG_SYMBIOTIC
- if (info->sym_state.sym_call_active == 0) {
+#ifdef CONFIG_SYMCALL
+ if (info->sym_core_state.symcall_state.sym_call_active == 0) {
update_irq_entry_state(info);
}
#else
vmcs_write(VMCS_GUEST_CR3, guest_cr3);
}
- rdtscll(info->time_state.cached_host_tsc);
+ // Perform last-minute time bookkeeping prior to entering the VM
+ v3_time_enter_vm(info);
- if (info->run_state == VM_STOPPED) {
- info->run_state = VM_RUNNING;
+ tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
+ tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
+ check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
+ check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
+
+
+ if (vmx_info->state == VMX_UNLAUNCHED) {
+ vmx_info->state = VMX_LAUNCHED;
+ info->vm_info->run_state = VM_RUNNING;
ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
} else {
+ V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
}
-
+
// PrintDebug("VMX Exit: ret=%d\n", ret);
if (ret != VMX_SUCCESS) {
return -1;
}
- rdtscll(tmp_tsc);
+ // Immediate exit from VM time bookkeeping
+ v3_time_exit_vm(info);
info->num_exits++;
- v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc);
-
/* Update guest state */
v3_vmx_save_vmcs(info);
exit_log[info->num_exits % 10] = exit_info;
-#ifdef CONFIG_SYMBIOTIC
- if (info->sym_state.sym_call_active == 0) {
+#ifdef CONFIG_SYMCALL
+ if (info->sym_core_state.symcall_state.sym_call_active == 0) {
update_irq_exit_state(info);
}
#else
update_irq_exit_state(info);
#endif
+ if (exit_info.exit_reason == VMEXIT_INTR_WINDOW) {
+ // This is a special case whose only job is to inject an interrupt
+ vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
+ vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
+ vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
+
+#ifdef CONFIG_DEBUG_INTERRUPTS
+ PrintDebug("Interrupts available again! (RIP=%llx)\n", info->rip);
+#endif
+ }
+
// reenable global interrupts after vm exit
v3_enable_ints();
}
-int v3_start_vmx_guest(struct guest_info* info) {
+int v3_start_vmx_guest(struct guest_info * info) {
+
+ PrintDebug("Starting VMX core %u\n", info->cpu_id);
+
+ if (info->cpu_id == 0) {
+ info->core_run_state = CORE_RUNNING;
+ info->vm_info->run_state = VM_RUNNING;
+ } else {
+
+ PrintDebug("VMX core %u: Waiting for core initialization\n", info->cpu_id);
+
+ while (info->core_run_state == CORE_STOPPED) {
+ v3_yield(info);
+ //PrintDebug("VMX core %u: still waiting for INIT\n",info->cpu_id);
+ }
+
+ PrintDebug("VMX core %u initialized\n", info->cpu_id);
+ }
- PrintDebug("Launching VMX guest\n");
+ PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x), RIP=0x%p\n",
+ info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
+ info->segments.cs.limit, (void *)(info->rip));
- rdtscll(info->time_state.cached_host_tsc);
+ PrintDebug("VMX core %u: Launching VMX VM\n", info->cpu_id);
+
+ v3_start_time(info);
while (1) {
+
+ if (info->vm_info->run_state == VM_STOPPED) {
+ info->core_run_state = CORE_STOPPED;
+ break;
+ }
+
if (v3_vmx_enter(info) == -1) {
v3_print_vmcs();
print_exit_log(info);
return -1;
}
+
+
+ if (info->vm_info->run_state == VM_STOPPED) {
+ info->core_run_state = CORE_STOPPED;
+ break;
+ }
/*
if ((info->num_exits % 5000) == 0) {
V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
}
+
+
+#define VMX_FEATURE_CONTROL_MSR 0x0000003a
+#define CPUID_VMX_FEATURES 0x00000005 /* LOCK and VMXON */
+#define CPUID_1_ECX_VTXFLAG 0x00000020
+
int v3_is_vmx_capable() {
v3_msr_t feature_msr;
uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
- if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
+ if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
PrintDebug("VMX is locked -- enable in the BIOS\n");
return 0;
}
return 1;
}
-static int has_vmx_nested_paging() {
- return 0;
-}
+
+
void v3_init_vmx_cpu(int cpu_id) {
extern v3_cpu_arch_t v3_cpu_types[];
- struct v3_msr tmp_msr;
- uint64_t ret = 0;
- v3_get_msr(VMX_CR4_FIXED0_MSR,&(tmp_msr.hi),&(tmp_msr.lo));
-
- __asm__ __volatile__ (
- "movq %%cr4, %%rbx;"
- "orq $0x00002000, %%rbx;"
- "movq %%rbx, %0;"
- : "=m"(ret)
- :
- : "%rbx"
- );
-
- if ((~ret & tmp_msr.value) == 0) {
- __asm__ __volatile__ (
- "movq %0, %%cr4;"
- :
- : "q"(ret)
- );
- } else {
- PrintError("Invalid CR4 Settings!\n");
- return;
+ if (cpu_id == 0) {
+ if (v3_init_vmx_hw(&hw_info) == -1) {
+ PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
+ return;
+ }
}
- __asm__ __volatile__ (
- "movq %%cr0, %%rbx; "
- "orq $0x00000020,%%rbx; "
- "movq %%rbx, %%cr0;"
- :
- :
- : "%rbx"
- );
- //
- // Should check and return Error here....
+
+ enable_vmx();
// Setup VMXON Region
PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);
- if (v3_enable_vmx(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
+ if (vmx_on(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
PrintDebug("VMX Enabled\n");
} else {
PrintError("VMX initialization failure\n");
}
- if (has_vmx_nested_paging() == 1) {
- v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
- } else {
- v3_cpu_types[cpu_id] = V3_VMX_CPU;
- }
+ v3_cpu_types[cpu_id] = V3_VMX_CPU;
+
}
+
+void v3_deinit_vmx_cpu(int cpu_id) {
+ extern v3_cpu_arch_t v3_cpu_types[];
+ v3_cpu_types[cpu_id] = V3_INVALID_CPU;
+ V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
+}