+struct vmx_intr_state {
+ union {
+ uint32_t value;
+ struct {
+ uint8_t block_sti : 1;
+ uint8_t block_mov_ss : 1;
+ uint8_t block_smi : 1;
+ uint8_t block_nmi : 1;
+ uint32_t rsvd : 28;
+ } __attribute__((packed));
+ } __attribute__((packed));
+} __attribute__((packed));
+
+
+struct vmx_pending_dbg_excps {
+ union {
+ uint64_t value;
+
+ struct {
+ uint32_t lo;
+ uint32_t hi;
+ } __attribute__((packed));
+
+ struct {
+ uint8_t b0 : 1;
+ uint8_t b1 : 1;
+ uint8_t b2 : 1;
+ uint8_t b3 : 1;
+ uint8_t rsvd1 : 8;
+ uint8_t bp_set : 1;
+ uint8_t rsvd2 : 1;
+ uint8_t bp_ss : 1;
+ uint64_t rsvd3 : 49;
+ } __attribute__((packed));
+ } __attribute__((packed));
+} __attribute__((packed));
/* Segment Selector Access Rights (32 bits) */
/* INTEL Manual: 20-4 vol 3B */
// Maybe make this a define....
-typedef enum v3_cpu_arch {V3_INVALID_CPU, V3_SVM_CPU, V3_SVM_REV3_CPU, V3_VMX_CPU, V3_VMX_EPT_CPU} v3_cpu_arch_t;
+typedef enum v3_cpu_arch {V3_INVALID_CPU, V3_SVM_CPU, V3_SVM_REV3_CPU, V3_VMX_CPU, V3_VMX_EPT_CPU, V3_VMX_EPT_UG_CPU} v3_cpu_arch_t;
v3_cpu_mode_t v3_get_host_cpu_mode();
void PrintPTEntry(struct guest_info * info, page_type_t type, addr_t vaddr, void * entry);
-void PrintHostPageTables(struct guest_info * info, addr_t cr3);
+void PrintHostPageTables(struct guest_info * info, v3_cpu_mode_t cpu_mode, addr_t cr3);
void PrintGuestPageTables(struct guest_info * info, addr_t cr3);
void PrintHostPageTree(struct guest_info * info, addr_t virtual_addr, addr_t cr3);
void PrintGuestPageTree(struct guest_info * info, addr_t virtual_addr, addr_t cr3);
* and the University of New Mexico. You can find out more at
* http://www.v3vee.org
*
- * Copyright (c) 2008, Peter Dinda <pdinda@northwestern.edu>
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
+ * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
* All rights reserved.
*
- * Author: Peter Dinda <pdinda@northwestern.edu>
* Author: Jack Lange <jarusl@cs.northwestern.edu>
*
* This is free software. You are permitted to use,
#define VMM_ERROR 3
-
-
-
struct vmx_pin_ctrls {
union {
uint32_t value;
uint_t enable_rdtscp : 1;
uint_t virt_x2apic : 1;
uint_t enable_vpid : 1;
- uint_t unrstrct_guest : 1;
+ uint_t wbinvd_exit : 1;
+ uint_t unrstrct_guest : 1; /* un restricted guest (CAN RUN IN REAL MODE) */
uint_t rsvd1 : 2;
uint_t pause_loop_exit : 1;
uint_t rsvd2 : 21;
uint_t zero4 : 5;
uint_t rsvd2 : 19;
#endif
-}__attribute__((packed));
+} __attribute__((packed));
struct vmcs_host_state {
struct v3_segment gdtr;
-
-
struct vmx_data {
vmx_state_t state;
vmxassist_state_t assist_state;
struct vmcs_host_state host_state;
- addr_t vmcs_ptr_phys;
- uint8_t ia32e_avail;
+
+ addr_t vmcs_ptr_phys;
v3_reg_t guest_cr4; /// corresponds to the CR4 Read shadow
-/*
- * vmx_assist.h: Context definitions for the VMXASSIST world switch.
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
*
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
*
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
+ * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
*
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
*
- * Leendert van Doorn, leendert@watson.ibm.com
- * Copyright (c) 2005, International Business Machines Corporation.
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
*/
#ifndef _VMX_ASSIST_H_
#ifdef __V3VEE__
#include <palacios/vm_guest.h>
+#include <palacios/vmx.h>
-#define VMXASSIST_BASE 0xD0000
-#define VMXASSIST_MAGIC 0x17101966
-struct vmx_assist_header {
- uint64_t rsvd; // 8 bytes of nothing
- uint32_t magic;
- uint32_t new_ctx_gpa;
- uint32_t old_ctx_gpa;
-} __attribute__((packed));
-
-
-union vmcs_arbytes {
- struct arbyte_fields {
- unsigned int seg_type : 4,
- s : 1,
- dpl : 2,
- p : 1,
- reserved0 : 4,
- avl : 1,
- reserved1 : 1,
- default_ops_size: 1,
- g : 1,
- null_bit : 1,
- reserved2 : 15;
- } __attribute__((packed)) fields;
- unsigned int bytes;
-} __attribute__((packed));
-
-struct vmx_assist_segment {
- uint32_t sel;
- uint32_t limit;
- uint32_t base;
- union vmcs_arbytes arbytes;
-} __attribute__((packed));
-
-/*
- * World switch state
- */
-struct vmx_assist_context {
- uint32_t eip; /* execution pointer */
- uint32_t esp; /* stack pointer */
- uint32_t eflags; /* flags register */
- uint32_t cr0;
- uint32_t cr3; /* page table directory */
- uint32_t cr4;
-
- uint32_t idtr_limit; /* idt */
- uint32_t idtr_base;
-
- uint32_t gdtr_limit; /* gdt */
- uint32_t gdtr_base;
-
- struct vmx_assist_segment cs;
- struct vmx_assist_segment ds;
- struct vmx_assist_segment es;
- struct vmx_assist_segment ss;
- struct vmx_assist_segment fs;
- struct vmx_assist_segment gs;
- struct vmx_assist_segment tr;
- struct vmx_assist_segment ldtr;
-
-
- unsigned char rm_irqbase[2];
-} __attribute__((packed));
-
-typedef struct vmx_assist_context vmx_assist_context_t;
int v3_vmxassist_ctx_switch(struct guest_info * info);
-
+int v3_vmxassist_init(struct guest_info * core, struct vmx_data * vmx_state);
#endif
#endif /* _VMX_ASSIST_H_ */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
#ifdef __V3VEE__
+#include <palacios/vmx_hw_info.h>
+
/* The actual format of these data structures is specified as being machine
dependent. Thus the lengths of the base address fields are defined as variable.
To be safe we assume the maximum(?) size fields
+
+ From Intel Manual...
+ N is the physical-address width supported by the logical processor. Software can determine a processor's
+ physical-address width by executing CPUID with 80000008H in EAX. The physical address
+ width is returned in bits 7:0 of EAX.
*/
+struct ept_exit_qual {
+ union {
+ uint64_t value;
+ struct {
+ uint64_t rd_op : 1;
+ uint64_t wr_op : 1;
+ uint64_t ifetch : 1;
+ uint64_t present : 1;
+ uint64_t write : 1;
+ uint64_t exec : 1;
+ uint64_t rsvd1 : 1;
+ uint64_t addr_valid : 1;
+ uint64_t addr_type : 1;
+ uint64_t rsvd2 : 1;
+ uint64_t nmi_unblock : 1;
+ uint64_t rsvd3 : 53;
+ } __attribute__((packed));
+ } __attribute__((packed));
+} __attribute__((packed));
+
+
+
+
typedef struct vmx_eptp {
- uint8_t psmt : 3;
- uint8_t pwl1 : 3;
- uint8_t rsvd1 : 6;
- uint64_t pml_base_addr : 39;
+ uint64_t psmt : 3; /* (0=UC, 6=WB) */
+ uint64_t pwl1 : 3; /* 1 less than EPT page-walk length (?)*/
+ uint64_t rsvd1 : 6;
+ uint64_t pml_base_addr : 39;
uint16_t rsvd2 : 13;
} __attribute__((packed)) vmx_eptp_t;
-typedef struct vmx_pml4 {
- uint8_t read : 1;
- uint8_t write : 1;
- uint8_t exec : 1;
- uint8_t rsvd1 : 5;
- uint8_t ignore1 : 4;
+typedef struct ept_pml4 {
+ uint64_t read : 1;
+ uint64_t write : 1;
+ uint64_t exec : 1;
+ uint64_t rsvd1 : 5;
+ uint64_t ignore1 : 4;
uint64_t pdp_base_addr : 39;
- uint8_t rsvd2 : 1;
- uint32_t ignore2 : 12;
-} __attribute__((packed)) vmx_pml4_t;
-
-
-typedef struct vmx_pdp_1GB {
- uint8_t read : 1;
- uint8_t write : 1;
- uint8_t exec : 1;
- uint8_t mt : 3;
- uint8_t ipat : 1;
- uint8_t large_page : 1;
- uint8_t ignore1 : 4;
- uint32_t rsvd1 : 18;
- uint32_t page_base_addr : 21;
- uint8_t rsvd2 : 1;
- uint32_t ignore2 : 12;
-} __attribute__((packed)) vmx_pdp_1GB_t;
-
-typedef struct vmx_pdp {
- uint8_t read : 1;
- uint8_t write : 1;
- uint8_t exec : 1;
- uint8_t rsvd1 : 4;
- uint8_t large_page : 1;
- uint8_t ignore1 : 4;
- uint32_t page_base_addr : 39;
- uint8_t rsvd2 : 1;
- uint32_t ignore2 : 12;
-} __attribute__((packed)) vmx_pdp_t;
-
-
-typedef struct vmx_pde_2MB {
- uint8_t read : 1;
- uint8_t write : 1;
- uint8_t exec : 1;
- uint8_t mt : 3;
- uint8_t ipat : 1;
- uint8_t large_page : 1;
- uint8_t ignore1 : 4;
- uint32_t rsvd1 : 9;
- uint32_t page_base_addr : 30;
- uint8_t rsvd2 : 1;
- uint32_t ignore2 : 12;
-} __attribute__((packed)) vmx_pde_2MB_t;
-
-
-typedef struct vmx_pde {
- uint8_t read : 1;
- uint8_t write : 1;
- uint8_t exec : 1;
- uint8_t rsvd1 : 4;
- uint8_t large_page : 1;
- uint8_t ignore1 : 4;
- uint32_t page_base_addr : 39;
- uint8_t rsvd2 : 1;
- uint32_t ignore2 : 12;
-} __attribute__((packed)) vmx_pde_t;
-
-
-
-typedef struct vmx_pte {
- uint8_t read : 1;
- uint8_t write : 1;
- uint8_t exec : 1;
- uint8_t mt : 3;
- uint8_t ipat : 1;
- uint8_t ignore1 : 5;
- uint32_t page_base_addr : 39;
- uint8_t rsvd2 : 1;
- uint32_t ignore2 : 12;
-} __attribute__((packed)) vmx_pte_t;
+ uint64_t rsvd2 : 1;
+ uint64_t ignore2 : 12;
+} __attribute__((packed)) ept_pml4_t;
+
+
+typedef struct ept_pdp_1GB {
+ uint64_t read : 1;
+ uint64_t write : 1;
+ uint64_t exec : 1;
+ uint64_t mt : 3;
+ uint64_t ipat : 1;
+ uint64_t large_page : 1;
+ uint64_t ignore1 : 4;
+ uint64_t rsvd1 : 18;
+ uint64_t page_base_addr : 21;
+ uint64_t rsvd2 : 1;
+ uint64_t ignore2 : 12;
+} __attribute__((packed)) ept_pdp_1GB_t;
+
+typedef struct ept_pdp {
+ uint64_t read : 1;
+ uint64_t write : 1;
+ uint64_t exec : 1;
+ uint64_t rsvd1 : 4;
+ uint64_t large_page : 1;
+ uint64_t ignore1 : 4;
+ uint64_t pd_base_addr : 39;
+ uint64_t rsvd2 : 1;
+ uint64_t ignore2 : 12;
+} __attribute__((packed)) ept_pdp_t;
+
+
+typedef struct ept_pde_2MB {
+ uint64_t read : 1;
+ uint64_t write : 1;
+ uint64_t exec : 1;
+ uint64_t mt : 3;
+ uint64_t ipat : 1;
+ uint64_t large_page : 1;
+ uint64_t ignore1 : 4;
+ uint64_t rsvd1 : 9;
+ uint64_t page_base_addr : 30;
+ uint64_t rsvd2 : 1;
+ uint64_t ignore2 : 12;
+} __attribute__((packed)) ept_pde_2MB_t;
+
+
+typedef struct ept_pde {
+ uint64_t read : 1;
+ uint64_t write : 1;
+ uint64_t exec : 1;
+ uint64_t rsvd1 : 4;
+ uint64_t large_page : 1;
+ uint64_t ignore1 : 4;
+ uint64_t pt_base_addr : 39;
+ uint64_t rsvd2 : 1;
+ uint64_t ignore2 : 12;
+} __attribute__((packed)) ept_pde_t;
+
+
+
+typedef struct ept_pte {
+ uint64_t read : 1;
+ uint64_t write : 1;
+ uint64_t exec : 1;
+ uint64_t mt : 3;
+ uint64_t ipat : 1;
+ uint64_t ignore1 : 5;
+ uint64_t page_base_addr : 39;
+ uint64_t rsvd2 : 1;
+ uint64_t ignore2 : 12;
+} __attribute__((packed)) ept_pte_t;
+
+int v3_init_ept(struct guest_info * core, struct vmx_hw_info * hw_info);
+int v3_handle_ept_fault(struct guest_info * core, addr_t fault_addr, struct ept_exit_qual * ept_qual);
+
#endif
VMEXIT_IO_INSTR = 30,
VMEXIT_RDMSR = 31,
VMEXIT_WRMSR = 32,
- VMEXIT_ENTRY_FAIL_INVALID_GUEST_STATE = 33,
- VMEXIT_ENTRY_FAIL_MSR_LOAD = 34,
+ VMEXIT_INVALID_GUEST_STATE = 33,
+ VMEXIT_INVALID_MSR_LOAD = 34,
VMEXIT_MWAIT = 36,
VMEXIT_MONITOR = 39,
VMEXIT_PAUSE = 40,
- VMEXIT_ENTRY_FAILURE_MACHINE_CHECK = 41,
+ VMEXIT_INVALID_MACHINE_CHECK = 41,
VMEXIT_TPR_BELOW_THRESHOLD = 43,
VMEXIT_APIC = 44,
VMEXIT_GDTR_IDTR = 46,
} __attribute__((packed));
-struct VMExitTSQual {
+ struct VMExitTSQual {
uint32_t selector : 16; // selector of destination TSS
uint32_t rsvd : 14; // reserved to 0
uint32_t src : 2; // (0: CALL ; 1: IRET ; 2: JMP ; 3: Task gate in IDT)
+struct vmx_basic_exit_info {
+ union {
+ uint32_t value;
+ struct {
+ uint16_t reason;
+ uint16_t rsvd1 :12;
+ uint8_t mtf_pending : 1;
+ uint8_t vmx_root_op : 1;
+ uint8_t rsvd2 : 1;
+ uint8_t entry_error : 1;
+ } __attribute__((packed));
+ } __attribute__((packed));
+} __attribute__((packed));
+
struct vmx_exit_info {
uint32_t instr_len;
uint32_t int_err;
addr_t guest_linear_addr;
+
+ /* EPT INFO */
+ addr_t ept_fault_addr;
+
};
};
+
+
int v3_init_vmx_hw(struct vmx_hw_info * hw_info);
+uint32_t v3_vmx_get_ctrl_features(struct vmx_ctrl_field * fields);
vmx_hw_info.o \
vmcs.o \
vmx_ctrl_regs.o \
- vmx_assist.o
+ vmx_assist.o \
+ vmx_ept.o
}
#endif
#ifdef CONFIG_VMX
- if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
+ if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU) || (cpu_type == V3_VMX_EPT_UG_CPU)) {
cpu_valid = 1;
v3_print_vmcs();
}
#ifdef CONFIG_VMX
case V3_VMX_CPU:
case V3_VMX_EPT_CPU:
+ case V3_VMX_EPT_UG_CPU:
v3_init_vmx_io_map(vm);
v3_init_vmx_msr_map(vm);
break;
#ifdef CONFIG_VMX
case V3_VMX_CPU:
case V3_VMX_EPT_CPU:
+ case V3_VMX_EPT_UG_CPU:
v3_deinit_vmx_io_map(vm);
v3_deinit_vmx_msr_map(vm);
break;
#ifdef CONFIG_VMX
case V3_VMX_CPU:
case V3_VMX_EPT_CPU:
+ case V3_VMX_EPT_UG_CPU:
if (v3_init_vmx_vmcs(core, vm->vm_class) == -1) {
PrintError("Error in VMX initialization\n");
return -1;
#ifdef CONFIG_VMX
case V3_VMX_CPU:
case V3_VMX_EPT_CPU:
+ case V3_VMX_EPT_UG_CPU:
if (v3_deinit_vmx_vmcs(core) == -1) {
PrintError("Error in VMX initialization\n");
return -1;
vmx_ret |= check_vmcs_write(VMCS_ENTRY_CTRLS, arch_data->entry_ctrls.value);
vmx_ret |= check_vmcs_write(VMCS_EXCP_BITMAP, arch_data->excp_bmap.value);
+ if (info->shdw_pg_mode == NESTED_PAGING) {
+ vmx_ret |= check_vmcs_write(VMCS_EPT_PTR, info->direct_map_pt);
+ }
+
return vmx_ret;
}
check_vmcs_read(VMCS_GUEST_DR7, &(info->dbg_regs.dr7));
check_vmcs_read(VMCS_GUEST_RFLAGS, &(info->ctrl_regs.rflags));
- if (((struct vmx_data *)info->vmm_data)->ia32e_avail) {
+
#ifdef __V3_64BIT__
- check_vmcs_read(VMCS_GUEST_EFER, &(info->ctrl_regs.efer));
-#else
- uint32_t hi, lo;
- check_vmcs_read(VMCS_GUEST_EFER, &hi);
- check_vmcs_read(VMCS_GUEST_EFER_HIGH, &lo);
- info->ctrl_regs.efer = ((uint64_t) hi << 32) | lo;
+ check_vmcs_read(VMCS_GUEST_EFER, &(info->ctrl_regs.efer));
#endif
- }
-
+
error = v3_read_vmcs_segments(&(info->segments));
return error;
check_vmcs_write(VMCS_GUEST_RFLAGS, info->ctrl_regs.rflags);
- if (((struct vmx_data *)info->vmm_data)->ia32e_avail) {
- check_vmcs_write(VMCS_GUEST_EFER, info->ctrl_regs.efer);
- }
+#ifdef __V3_64BIT__
+ check_vmcs_write(VMCS_GUEST_EFER, info->ctrl_regs.efer);
+#endif
+
+
+
error = v3_write_vmcs_segments(&(info->segments));
#define SYSENTER_CS_MSR 0x00000174
#define SYSENTER_ESP_MSR 0x00000175
#define SYSENTER_EIP_MSR 0x00000176
+#define EFER_MSR 0xc0000080
// SYSENTER CS MSR
v3_get_msr(SYSENTER_CS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
v3_get_msr(SYSENTER_EIP_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
vmx_ret |= check_vmcs_write(VMCS_HOST_SYSENTER_EIP, tmp_msr.value);
+ // EFER
+ v3_get_msr(EFER_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
+ vmx_ret |= check_vmcs_write(VMCS_HOST_EFER, tmp_msr.value);
+
return vmx_ret;
}
#ifdef CONFIG_VMX
case V3_VMX_CPU:
case V3_VMX_EPT_CPU:
+ case V3_VMX_EPT_UG_CPU:
PrintDebug("Deinitializing VMX CPU %d\n", cpu_id);
v3_deinit_vmx_cpu(cpu_id);
break;
#if CONFIG_VMX
case V3_VMX_CPU:
case V3_VMX_EPT_CPU:
+ case V3_VMX_EPT_UG_CPU:
return v3_start_vmx_guest(core);
break;
#endif
#if CONFIG_VMX
case V3_VMX_CPU:
case V3_VMX_EPT_CPU:
+ case V3_VMX_EPT_UG_CPU:
return v3_vmx_enter(info);
break;
#endif
if (pg_mode) {
if ((strcasecmp(pg_mode, "nested") == 0)) {
// we assume symmetric cores, so if core 0 has nested paging they all do
- if (v3_cpu_types[0] == V3_SVM_REV3_CPU) {
+ if ((v3_cpu_types[0] == V3_SVM_REV3_CPU) ||
+ (v3_cpu_types[0] == V3_VMX_EPT_CPU) ||
+ (v3_cpu_types[0] == V3_VMX_EPT_UG_CPU)) {
info->shdw_pg_mode = NESTED_PAGING;
} else {
PrintError("Nested paging not supported on this hardware. Defaulting to shadow paging\n");
static int unhandled_err(struct guest_info * core, addr_t guest_va, addr_t guest_pa,
struct v3_mem_region * reg, pf_error_t access_info) {
- PrintError("Unhandled memory access error\n");
+ PrintError("Unhandled memory access error (gpa=%p, gva=%p, error_code=%d)\n",
+ (void *)guest_pa, (void *)guest_va, *(uint32_t *)&access_info);
v3_print_mem_map(core->vm_info);
}
-void PrintHostPageTables(struct guest_info * info, addr_t cr3) {
+void PrintHostPageTables(struct guest_info * info, v3_cpu_mode_t cpu_mode, addr_t cr3) {
PrintDebug("CR3: %p\n", (void *)cr3);
- switch (info->cpu_mode) {
+ switch (cpu_mode) {
case PROTECTED:
v3_walk_host_pt_32(info, cr3, print_page_walk_cb, NULL);
break;
void PrintGuestPageTables(struct guest_info * info, addr_t cr3) {
PrintDebug("CR3: %p\n", (void *)cr3);
switch (info->cpu_mode) {
+ case REAL:
case PROTECTED:
v3_walk_guest_pt_32(info, cr3, print_page_walk_cb, NULL);
break;
* and the University of New Mexico. You can find out more at
* http://www.v3vee.org
*
- * Copyright (c) 2008, Peter Dinda <pdinda@northwestern.edu>
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
+ * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
* All rights reserved.
*
- * Author: Peter Dinda <pdinda@northwestern.edu>
- * Jack Lange <jarusl@cs.northwestern.edu>
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
*
* This is free software. You are permitted to use,
* redistribute, and modify it as specified in the file "V3VEE_LICENSE".
#include <palacios/vmx_io.h>
#include <palacios/vmx_msr.h>
+#include <palacios/vmx_ept.h>
+#include <palacios/vmx_assist.h>
#include <palacios/vmx_hw_info.h>
#ifndef CONFIG_DEBUG_VMX
/* These fields contain the hardware feature sets supported by the local CPU */
static struct vmx_hw_info hw_info;
+extern v3_cpu_arch_t v3_cpu_types[];
static addr_t active_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
-static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state) {
+static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
int vmx_ret = 0;
// disable global interrupts for vm state initialization
PrintDebug("Loading VMCS\n");
vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
- active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
+ active_vmcs_ptrs[V3_Get_CPU()] = vmx_state->vmcs_ptr_phys;
vmx_state->state = VMX_UNLAUNCHED;
if (vmx_ret != VMX_SUCCESS) {
vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
- vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;;
+ vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
+ vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
/* Print Control MSRs */
PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)hw_info.cr0.value);
vmx_state->host_state.tr.base = tmp_seg.base;
-
-
- /********** Setup and VMX Control Fields from MSR ***********/
+ /********** Setup VMX Control Fields ***********/
/* Add external interrupts, NMI exiting, and virtual NMI */
vmx_state->pin_ctrls.nmi_exit = 1;
vmx_state->pin_ctrls.ext_int_exit = 1;
- vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
+
vmx_state->pri_proc_ctrls.hlt_exit = 1;
vmx_state->pri_proc_ctrls.invlpg_exit = 1;
- vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
+
vmx_state->pri_proc_ctrls.pause_exit = 1;
vmx_state->pri_proc_ctrls.tsc_offset = 1;
#ifdef CONFIG_TIME_VIRTUALIZE_TSC
#endif
/* Setup IO map */
- vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->vm_info->io_map.arch_data));
+ vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
+ vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR,
- (addr_t)V3_PAddr(info->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
+ (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
+
+
+ vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
+ vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
- vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(info->vm_info->msr_map.arch_data));
+#ifdef __V3_64BIT__
vmx_state->exit_ctrls.host_64_on = 1;
+#endif
- if ((vmx_state->exit_ctrls.save_efer == 1) || (vmx_state->exit_ctrls.ld_efer == 1)) {
- vmx_state->ia32e_avail = 1;
- }
- /******* Setup VMXAssist guest state ***********/
+ /* Not sure how exactly to handle this... */
+ v3_hook_msr(core->vm_info, EFER_MSR,
+ &v3_handle_efer_read,
+ &v3_handle_efer_write,
+ core);
+
+
+ vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
- info->rip = 0xd0000;
- info->vm_regs.rsp = 0x80000;
- info->ctrl_regs.rflags->rsvd1 = 1;
-#define GUEST_CR0 0x80000031
-#define GUEST_CR4 0x00002000
- info->ctrl_regs.cr0 = GUEST_CR0;
- info->ctrl_regs.cr4 = GUEST_CR4;
- ((struct cr0_32 *)&(info->shdw_pg_state.guest_cr0))->pe = 1;
-
/* Setup paging */
- if (info->shdw_pg_mode == SHADOW_PAGING) {
+ if (core->shdw_pg_mode == SHADOW_PAGING) {
PrintDebug("Creating initial shadow page table\n");
- if (v3_init_passthrough_pts(info) == -1) {
+ if (v3_init_passthrough_pts(core) == -1) {
PrintError("Could not initialize passthrough page tables\n");
return -1;
}
#define CR0_PE 0x00000001
#define CR0_PG 0x80000000
+#define CR0_WP 0x00010000 // To ensure mem hooks work
+ vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
-
- vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG) );
- vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
-
- info->ctrl_regs.cr3 = info->direct_map_pt;
+ core->ctrl_regs.cr3 = core->direct_map_pt;
// vmx_state->pinbased_ctrls |= NMI_EXIT;
/* Add page fault exits */
vmx_state->excp_bmap.pf = 1;
- }
- // Setup segment registers
- {
- struct v3_segment * seg_reg = (struct v3_segment *)&(info->segments);
+ // Setup VMX Assist
+ v3_vmxassist_init(core, vmx_state);
- int i;
+ } else if ((core->shdw_pg_mode == NESTED_PAGING) &&
+ (v3_cpu_types[core->cpu_id] == V3_VMX_EPT_CPU)) {
- for (i = 0; i < 10; i++) {
- seg_reg[i].selector = 3 << 3;
- seg_reg[i].limit = 0xffff;
- seg_reg[i].base = 0x0;
- }
+ // initialize 1to1 pts
- info->segments.cs.selector = 2<<3;
-
- /* Set only the segment registers */
- for (i = 0; i < 6; i++) {
- seg_reg[i].limit = 0xfffff;
- seg_reg[i].granularity = 1;
- seg_reg[i].type = 3;
- seg_reg[i].system = 1;
- seg_reg[i].dpl = 0;
- seg_reg[i].present = 1;
- seg_reg[i].db = 1;
- }
+#define CR0_PE 0x00000001
+#define CR0_PG 0x80000000
+#define CR0_WP 0x00010000 // To ensure mem hooks work
+ vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
+
+ // vmx_state->pinbased_ctrls |= NMI_EXIT;
- info->segments.cs.type = 0xb;
+ /* Add CR exits */
+ //vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
+ //vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
- info->segments.ldtr.selector = 0x20;
- info->segments.ldtr.type = 2;
- info->segments.ldtr.system = 0;
- info->segments.ldtr.present = 1;
- info->segments.ldtr.granularity = 0;
+ /* Add page fault exits */
+ vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
+
+ // Setup VMX Assist
+ v3_vmxassist_init(core, vmx_state);
-
- /************* Map in GDT and vmxassist *************/
-
- uint64_t gdt[] __attribute__ ((aligned(32))) = {
- 0x0000000000000000ULL, /* 0x00: reserved */
- 0x0000830000000000ULL, /* 0x08: 32-bit TSS */
- //0x0000890000000000ULL, /* 0x08: 32-bit TSS */
- 0x00CF9b000000FFFFULL, /* 0x10: CS 32-bit */
- 0x00CF93000000FFFFULL, /* 0x18: DS 32-bit */
- 0x000082000000FFFFULL, /* 0x20: LDTR 32-bit */
- };
-
-#define VMXASSIST_GDT 0x10000
- addr_t vmxassist_gdt = 0;
-
- if (v3_gpa_to_hva(info, VMXASSIST_GDT, &vmxassist_gdt) == -1) {
- PrintError("Could not find VMXASSIST GDT destination\n");
+ /* Enable EPT */
+ vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
+ vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
+ // vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
+
+ vmx_state->entry_ctrls.ld_efer = 1;
+ vmx_state->exit_ctrls.ld_efer = 1;
+ vmx_state->exit_ctrls.save_efer = 1;
+
+ if (v3_init_ept(core, &hw_info) == -1) {
+ PrintError("Error initializing EPT\n");
return -1;
}
- memcpy((void *)vmxassist_gdt, gdt, sizeof(uint64_t) * 5);
-
- info->segments.gdtr.base = VMXASSIST_GDT;
-
-#define VMXASSIST_TSS 0x40000
- uint64_t vmxassist_tss = VMXASSIST_TSS;
- gdt[0x08 / sizeof(gdt[0])] |=
- ((vmxassist_tss & 0xFF000000) << (56 - 24)) |
- ((vmxassist_tss & 0x00FF0000) << (32 - 16)) |
- ((vmxassist_tss & 0x0000FFFF) << (16)) |
- (8392 - 1);
-
- info->segments.tr.selector = 0x08;
- info->segments.tr.base = vmxassist_tss;
-
- //info->segments.tr.type = 0x9;
- info->segments.tr.type = 0x3;
- info->segments.tr.system = 0;
- info->segments.tr.present = 1;
- info->segments.tr.granularity = 0;
- }
-
- // setup VMXASSIST
- {
-#define VMXASSIST_START 0x000d0000
- extern uint8_t v3_vmxassist_start[];
- extern uint8_t v3_vmxassist_end[];
- addr_t vmxassist_dst = 0;
-
- if (v3_gpa_to_hva(info, VMXASSIST_START, &vmxassist_dst) == -1) {
- PrintError("Could not find VMXASSIST destination\n");
- return -1;
+
+ } else if ((core->shdw_pg_mode == NESTED_PAGING) &&
+ (v3_cpu_types[core->cpu_id] == V3_VMX_EPT_UG_CPU)) {
+ int i = 0;
+ // For now we will assume that unrestricted guest mode is assured w/ EPT
+
+ core->vm_regs.rsp = 0x00;
+ core->rip = 0xfff0;
+ core->vm_regs.rdx = 0x00000f00;
+ core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
+ core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
+
+
+ core->segments.cs.selector = 0xf000;
+ core->segments.cs.limit = 0xffff;
+ core->segments.cs.base = 0x0000000f0000LL;
+
+ // (raw attributes = 0xf3)
+ core->segments.cs.type = 0xb;
+ core->segments.cs.system = 0x1;
+ core->segments.cs.dpl = 0x0;
+ core->segments.cs.present = 1;
+
+
+
+ struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds),
+ &(core->segments.es), &(core->segments.fs),
+ &(core->segments.gs), NULL};
+
+ for ( i = 0; segregs[i] != NULL; i++) {
+ struct v3_segment * seg = segregs[i];
+
+ seg->selector = 0x0000;
+ // seg->base = seg->selector << 4;
+ seg->base = 0x00000000;
+ seg->limit = 0xffff;
+
+
+ seg->type = 0x3;
+ seg->system = 0x1;
+ seg->dpl = 0x0;
+ seg->present = 1;
+ // seg->granularity = 1;
+
}
- memcpy((void *)vmxassist_dst, v3_vmxassist_start, v3_vmxassist_end - v3_vmxassist_start);
+ core->segments.gdtr.limit = 0x0000ffff;
+ core->segments.gdtr.base = 0x0000000000000000LL;
+
+ core->segments.idtr.limit = 0x0000ffff;
+ core->segments.idtr.base = 0x0000000000000000LL;
+
+ core->segments.ldtr.selector = 0x0000;
+ core->segments.ldtr.limit = 0x0000ffff;
+ core->segments.ldtr.base = 0x0000000000000000LL;
+ core->segments.ldtr.type = 2;
+ core->segments.ldtr.present = 1;
+
+ core->segments.tr.selector = 0x0000;
+ core->segments.tr.limit = 0x0000ffff;
+ core->segments.tr.base = 0x0000000000000000LL;
+ core->segments.tr.type = 0xb;
+ core->segments.tr.present = 1;
+
+ // core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
+ core->dbg_regs.dr7 = 0x0000000000000400LL;
+
+ /* Enable EPT */
+ vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
+ vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
+ vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
+
+ vmx_state->entry_ctrls.ld_efer = 1;
+ vmx_state->exit_ctrls.ld_efer = 1;
+ vmx_state->exit_ctrls.save_efer = 1;
+
+
+ if (v3_init_ept(core, &hw_info) == -1) {
+ PrintError("Error initializing EPT\n");
+ return -1;
+ }
+
+ } else {
+ PrintError("Invalid Virtual paging mode\n");
+ return -1;
+ }
- vmx_state->assist_state = VMXASSIST_DISABLED;
- }
+ // Hook the VMX msrs
+ // Setup SYSCALL/SYSENTER MSRs in load/store area
/* Sanity check ctrl/reg fields against hw_defaults */
+
/*** Write all the info to the VMCS ***/
+ /*
{
+ // IS THIS NECESSARY???
#define DEBUGCTL_MSR 0x1d9
struct v3_msr tmp_msr;
v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
- info->dbg_regs.dr7 = 0x400;
+ core->dbg_regs.dr7 = 0x400;
}
-
+ */
#ifdef __V3_64BIT__
vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
- if (v3_update_vmcs_ctrl_fields(info)) {
+ if (v3_update_vmcs_ctrl_fields(core)) {
PrintError("Could not write control fields!\n");
return -1;
}
- if (v3_update_vmcs_host_state(info)) {
+ if (v3_update_vmcs_host_state(core)) {
PrintError("Could not write host state\n");
return -1;
}
-
-
// reenable global interrupts for vm state initialization now
// that the vm state is initialized. If another VM kicks us off,
// it'll update our vmx state so that we know to reload ourself
return 0;
}
-int v3_init_vmx_vmcs(struct guest_info * info, v3_vm_class_t vm_class) {
+int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
struct vmx_data * vmx_state = NULL;
int vmx_ret = 0;
PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
- info->vmm_data = vmx_state;
+ core->vmm_data = vmx_state;
vmx_state->state = VMX_UNLAUNCHED;
- PrintDebug("Initializing VMCS (addr=%p)\n", info->vmm_data);
+ PrintDebug("Initializing VMCS (addr=%p)\n", core->vmm_data);
// TODO: Fix vmcs fields so they're 32-bit
if (vm_class == V3_PC_VM) {
PrintDebug("Initializing VMCS\n");
- init_vmcs_bios(info, vmx_state);
+ init_vmcs_bios(core, vmx_state);
} else {
PrintError("Invalid VM Class\n");
return -1;
check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
+ if (info->shdw_pg_mode == NESTED_PAGING) {
+ check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
+ }
+
//PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
exit_log[info->num_exits % 10] = exit_info;
void v3_init_vmx_cpu(int cpu_id) {
- extern v3_cpu_arch_t v3_cpu_types[];
if (cpu_id == 0) {
if (v3_init_vmx_hw(&hw_info) == -1) {
}
}
-
enable_vmx();
}
- v3_cpu_types[cpu_id] = V3_VMX_CPU;
-
-
+ {
+ struct vmx_sec_proc_ctrls sec_proc_ctrls;
+ sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
+
+ if (sec_proc_ctrls.enable_ept == 0) {
+ V3_Print("VMX EPT (Nested) Paging not supported\n");
+ v3_cpu_types[cpu_id] = V3_VMX_CPU;
+ } else if (sec_proc_ctrls.unrstrct_guest == 0) {
+ V3_Print("VMX EPT (Nested) Paging supported\n");
+ v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
+ } else {
+ V3_Print("VMX EPT (Nested) Paging + Unrestricted guest supported\n");
+ v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
+ }
+ }
}
#include <palacios/vmx_lowlevel.h>
#include <palacios/vm_guest_mem.h>
#include <palacios/vmx.h>
+#include <palacios/vmm_ctrl_regs.h>
#ifndef CONFIG_DEBUG_VMX
#undef PrintDebug
#define PrintDebug(fmt, args...)
#endif
+#define VMXASSIST_GDT 0x10000
+#define VMXASSIST_TSS 0x40000
+#define VMXASSIST_START 0xd0000
+#define VMXASSIST_1to1_PT 0xde000 // We'll shove this at the end, and pray to god VMXASSIST doesn't mess with it
+
+
+#define VMXASSIST_MAGIC 0x17101966
+
+
+struct vmx_assist_header {
+ uint64_t rsvd; // 8 bytes of nothing
+ uint32_t magic;
+ uint32_t new_ctx_gpa;
+ uint32_t old_ctx_gpa;
+} __attribute__((packed));
+
+
+union vmcs_arbytes {
+ struct arbyte_fields {
+ unsigned int seg_type : 4,
+ s : 1,
+ dpl : 2,
+ p : 1,
+ reserved0 : 4,
+ avl : 1,
+ reserved1 : 1,
+ default_ops_size: 1,
+ g : 1,
+ null_bit : 1,
+ reserved2 : 15;
+ } __attribute__((packed)) fields;
+ unsigned int bytes;
+} __attribute__((packed));
+
+struct vmx_assist_segment {
+ uint32_t sel;
+ uint32_t limit;
+ uint32_t base;
+ union vmcs_arbytes arbytes;
+} __attribute__((packed));
+
+
+/*
+ * World switch state
+ */
+struct vmx_assist_context {
+ uint32_t eip; /* execution pointer */
+ uint32_t esp; /* stack pointer */
+ uint32_t eflags; /* flags register */
+ uint32_t cr0;
+ uint32_t cr3; /* page table directory */
+ uint32_t cr4;
+
+ uint32_t idtr_limit; /* idt */
+ uint32_t idtr_base;
+
+ uint32_t gdtr_limit; /* gdt */
+ uint32_t gdtr_base;
+
+ struct vmx_assist_segment cs;
+ struct vmx_assist_segment ds;
+ struct vmx_assist_segment es;
+ struct vmx_assist_segment ss;
+ struct vmx_assist_segment fs;
+ struct vmx_assist_segment gs;
+ struct vmx_assist_segment tr;
+ struct vmx_assist_segment ldtr;
+
+
+ unsigned char rm_irqbase[2];
+} __attribute__((packed));
+
+
+
static void vmx_save_world_ctx(struct guest_info * info, struct vmx_assist_context * ctx);
static void vmx_restore_world_ctx(struct guest_info * info, struct vmx_assist_context * ctx);
- if (v3_gpa_to_hva(info, VMXASSIST_BASE, (addr_t *)&hdr) == -1) {
+ if (v3_gpa_to_hva(info, VMXASSIST_START, (addr_t *)&hdr) == -1) {
PrintError("Could not translate address for vmxassist header\n");
return -1;
}
}
+int v3_vmxassist_init(struct guest_info * core, struct vmx_data * vmx_state) {
+
+ core->rip = 0xd0000;
+ core->vm_regs.rsp = 0x80000;
+ ((struct rflags *)&(core->ctrl_regs.rflags))->rsvd1 = 1;
+
+#define GUEST_CR0 0x80010031
+#define GUEST_CR4 0x00002010
+ core->ctrl_regs.cr0 = GUEST_CR0;
+ core->ctrl_regs.cr4 = GUEST_CR4;
+
+ ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->pe = 1;
+ ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->wp = 1;
+
+
+ // Setup segment registers
+ {
+ struct v3_segment * seg_reg = (struct v3_segment *)&(core->segments);
+
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ seg_reg[i].selector = 3 << 3;
+ seg_reg[i].limit = 0xffff;
+ seg_reg[i].base = 0x0;
+ }
+
+ core->segments.cs.selector = 2 << 3;
+
+ /* Set only the segment registers */
+ for (i = 0; i < 6; i++) {
+ seg_reg[i].limit = 0xfffff;
+ seg_reg[i].granularity = 1;
+ seg_reg[i].type = 3;
+ seg_reg[i].system = 1;
+ seg_reg[i].dpl = 0;
+ seg_reg[i].present = 1;
+ seg_reg[i].db = 1;
+ }
+
+ core->segments.cs.type = 0xb;
+
+ core->segments.ldtr.selector = 0x20;
+ core->segments.ldtr.type = 2;
+ core->segments.ldtr.system = 0;
+ core->segments.ldtr.present = 1;
+ core->segments.ldtr.granularity = 0;
+
+
+ /************* Map in GDT and vmxassist *************/
+
+ uint64_t gdt[] __attribute__ ((aligned(32))) = {
+ 0x0000000000000000ULL, /* 0x00: reserved */
+ 0x0000830000000000ULL, /* 0x08: 32-bit TSS */
+ //0x0000890000000000ULL, /* 0x08: 32-bit TSS */
+ 0x00CF9b000000FFFFULL, /* 0x10: CS 32-bit */
+ 0x00CF93000000FFFFULL, /* 0x18: DS 32-bit */
+ 0x000082000000FFFFULL, /* 0x20: LDTR 32-bit */
+ };
+
+
+ addr_t vmxassist_gdt = 0;
+
+ if (v3_gpa_to_hva(core, VMXASSIST_GDT, &vmxassist_gdt) == -1) {
+ PrintError("Could not find VMXASSIST GDT destination\n");
+ return -1;
+ }
+
+ memcpy((void *)vmxassist_gdt, gdt, sizeof(uint64_t) * 5);
+
+ core->segments.gdtr.base = VMXASSIST_GDT;
+
+
+ uint64_t vmxassist_tss = VMXASSIST_TSS;
+ gdt[0x08 / sizeof(gdt[0])] |=
+ ((vmxassist_tss & 0xFF000000) << (56 - 24)) |
+ ((vmxassist_tss & 0x00FF0000) << (32 - 16)) |
+ ((vmxassist_tss & 0x0000FFFF) << (16)) |
+ (8392 - 1);
+
+ core->segments.tr.selector = 0x08;
+ core->segments.tr.base = vmxassist_tss;
+
+ //core->segments.tr.type = 0x9;
+ core->segments.tr.type = 0x3;
+ core->segments.tr.system = 0;
+ core->segments.tr.present = 1;
+ core->segments.tr.granularity = 0;
+ }
+
+ if (core->shdw_pg_mode == NESTED_PAGING) {
+ // setup 1to1 page table internally.
+ int i = 0;
+ pde32_4MB_t * pde = NULL;
+
+ PrintError("Setting up internal VMXASSIST page tables\n");
+
+ if (v3_gpa_to_hva(core, VMXASSIST_1to1_PT, (addr_t *)(&pde)) == -1) {
+ PrintError("Could not find VMXASSIST 1to1 PT destination\n");
+ return -1;
+ }
+
+ memset(pde, 0, PAGE_SIZE);
+
+ for (i = 0; i < 1024; i++) {
+ pde[i].present = 1;
+ pde[i].writable = 1;
+ pde[i].user_page = 1;
+ pde[i].large_page = 1;
+ pde[i].page_base_addr = PAGE_BASE_ADDR_4MB(i * PAGE_SIZE_4MB);
+
+ // PrintError("PDE %d: %x\n", i, *(uint32_t *)&(pde[i]));
+ }
+
+ core->ctrl_regs.cr3 = VMXASSIST_1to1_PT;
+
+ }
+
+ // setup VMXASSIST
+ {
+
+ extern uint8_t v3_vmxassist_start[];
+ extern uint8_t v3_vmxassist_end[];
+ addr_t vmxassist_dst = 0;
+
+ if (v3_gpa_to_hva(core, VMXASSIST_START, &vmxassist_dst) == -1) {
+ PrintError("Could not find VMXASSIST destination\n");
+ return -1;
+ }
+
+ memcpy((void *)vmxassist_dst, v3_vmxassist_start, v3_vmxassist_end - v3_vmxassist_start);
+
+
+ vmx_state->assist_state = VMXASSIST_DISABLED;
+ }
+
+
+ return 0;
+}
* redistribute, and modify it as specified in the file "V3VEE_LICENSE".
*/
+#include <palacios/vmm.h>
+#include <palacios/vmx_ept.h>
+#include <palacios/vmx_lowlevel.h>
+#include <palacios/vmm_paging.h>
+#include <palacios/vm_guest_mem.h>
+
+static struct vmx_ept_msr * ept_info = NULL;
+
+
+static addr_t create_ept_page() {
+ void * page = 0;
+ page = V3_VAddr(V3_AllocPages(1));
+ memset(page, 0, PAGE_SIZE);
+
+ return (addr_t)page;
+}
+
+
+
+
+int v3_init_ept(struct guest_info * core, struct vmx_hw_info * hw_info) {
+ addr_t ept_pa = (addr_t)V3_PAddr((void *)create_ept_page());
+ vmx_eptp_t * ept_ptr = (vmx_eptp_t *)&(core->direct_map_pt);
+
+
+ ept_info = &(hw_info->ept_info);
+
+ /* TODO: Should we set this to WB?? */
+ ept_ptr->psmt = 0;
+
+ if (ept_info->pg_walk_len4) {
+ ept_ptr->pwl1 = 3;
+ } else {
+ PrintError("Unsupported EPT Table depth\n");
+ return -1;
+ }
+
+ ept_ptr->pml_base_addr = PAGE_BASE_ADDR(ept_pa);
+
+
+ return 0;
+}
+
+
+/* We can use the default paging macros, since the formats are close enough to allow it */
+
+int v3_handle_ept_fault(struct guest_info * core, addr_t fault_addr, struct ept_exit_qual * ept_qual) {
+ ept_pml4_t * pml = NULL;
+ // ept_pdp_1GB_t * pdpe1gb = NULL;
+ ept_pdp_t * pdpe = NULL;
+ ept_pde_2MB_t * pde2mb = NULL;
+ ept_pde_t * pde = NULL;
+ ept_pte_t * pte = NULL;
+ addr_t host_addr = 0;
+
+ int pml_index = PML4E64_INDEX(fault_addr);
+ int pdpe_index = PDPE64_INDEX(fault_addr);
+ int pde_index = PDE64_INDEX(fault_addr);
+ int pte_index = PTE64_INDEX(fault_addr);
+
+ struct v3_mem_region * region = v3_get_mem_region(core->vm_info, core->cpu_id, fault_addr);
+ int page_size = PAGE_SIZE_4KB;
+
+
+
+ pf_error_t error_code = {0};
+ error_code.present = ept_qual->present;
+ error_code.write = ept_qual->write;
+
+ if (region == NULL) {
+ PrintError("invalid region, addr=%p\n", (void *)fault_addr);
+ return -1;
+ }
+
+ if ((core->use_large_pages == 1) || (core->use_giant_pages == 1)) {
+ page_size = v3_get_max_page_size(core, fault_addr, LONG);
+ }
+
+
+
+ pml = (ept_pml4_t *)CR3_TO_PML4E64_VA(core->direct_map_pt);
+
+
+
+ //Fix up the PML entry
+ if (pml[pml_index].read == 0) {
+ pdpe = (ept_pdp_t *)create_ept_page();
+
+ // Set default PML Flags...
+ pml[pml_index].read = 1;
+ pml[pml_index].write = 1;
+ pml[pml_index].exec = 1;
+
+ pml[pml_index].pdp_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pdpe));
+ } else {
+ pdpe = V3_VAddr((void *)BASE_TO_PAGE_ADDR_4KB(pml[pml_index].pdp_base_addr));
+ }
+
+
+ // Fix up the PDPE entry
+ if (pdpe[pdpe_index].read == 0) {
+ pde = (ept_pde_t *)create_ept_page();
+
+ // Set default PDPE Flags...
+ pdpe[pdpe_index].read = 1;
+ pdpe[pdpe_index].write = 1;
+ pdpe[pdpe_index].exec = 1;
+
+ pdpe[pdpe_index].pd_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pde));
+ } else {
+ pde = V3_VAddr((void *)BASE_TO_PAGE_ADDR_4KB(pdpe[pdpe_index].pd_base_addr));
+ }
+
+
+
+ // Fix up the 2MiB PDE and exit here
+ if (page_size == PAGE_SIZE_2MB) {
+ pde2mb = (ept_pde_2MB_t *)pde; // all but these two lines are the same for PTE
+ pde2mb[pde_index].large_page = 1;
+
+ if (pde2mb[pde_index].read == 0) {
+
+ if ( (region->flags.alloced == 1) &&
+ (region->flags.read == 1)) {
+ // Full access
+ pde2mb[pde_index].read = 1;
+ pde2mb[pde_index].exec = 1;
+
+ if (region->flags.write == 1) {
+ pde2mb[pde_index].write = 1;
+ } else {
+ pde2mb[pde_index].write = 0;
+ }
+
+ if (v3_gpa_to_hpa(core, fault_addr, &host_addr) == -1) {
+ PrintError("Error: Could not translate fault addr (%p)\n", (void *)fault_addr);
+ return -1;
+ }
+
+ pde2mb[pde_index].page_base_addr = PAGE_BASE_ADDR_2MB(host_addr);
+ } else {
+ return region->unhandled(core, fault_addr, fault_addr, region, error_code);
+ }
+ } else {
+ // We fix all permissions on the first pass,
+ // so we only get here if its an unhandled exception
+
+ return region->unhandled(core, fault_addr, fault_addr, region, error_code);
+ }
+
+ return 0;
+ }
+
+ // Continue with the 4KiB page heirarchy
+
+
+ // Fix up the PDE entry
+ if (pde[pde_index].read == 0) {
+ pte = (ept_pte_t *)create_ept_page();
+
+ pde[pde_index].read = 1;
+ pde[pde_index].write = 1;
+ pde[pde_index].exec = 1;
+
+ pde[pde_index].pt_base_addr = PAGE_BASE_ADDR_4KB((addr_t)V3_PAddr(pte));
+ } else {
+ pte = V3_VAddr((void *)BASE_TO_PAGE_ADDR_4KB(pde[pde_index].pt_base_addr));
+ }
+
+
+
+
+ // Fix up the PTE entry
+ if (pte[pte_index].read == 0) {
+
+ if ( (region->flags.alloced == 1) &&
+ (region->flags.read == 1)) {
+ // Full access
+ pte[pte_index].read = 1;
+ pte[pte_index].exec = 1;
+
+ if (region->flags.write == 1) {
+ pte[pte_index].write = 1;
+ } else {
+ pte[pte_index].write = 0;
+ }
+
+ if (v3_gpa_to_hpa(core, fault_addr, &host_addr) == -1) {
+ PrintError("Error Could not translate fault addr (%p)\n", (void *)fault_addr);
+ return -1;
+ }
+
+
+ pte[pte_index].page_base_addr = PAGE_BASE_ADDR_4KB(host_addr);
+ } else {
+ return region->unhandled(core, fault_addr, fault_addr, region, error_code);
+ }
+ } else {
+ // We fix all permissions on the first pass,
+ // so we only get here if its an unhandled exception
+
+ return region->unhandled(core, fault_addr, fault_addr, region, error_code);
+ }
+
+
+ return 0;
+}
#include <palacios/vmx_ctrl_regs.h>
#include <palacios/vmx_assist.h>
#include <palacios/vmm_halt.h>
+#include <palacios/vmx_ept.h>
#ifndef CONFIG_DEBUG_VMX
#undef PrintDebug
/* At this point the GPRs are already copied into the guest_info state */
int v3_handle_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_info) {
+ struct vmx_basic_exit_info * basic_info = (struct vmx_basic_exit_info *)&(exit_info->exit_reason);
+
/*
PrintError("Handling VMEXIT: %s (%u), %lu (0x%lx)\n",
v3_vmx_exit_code_to_str(exit_info->exit_reason),
v3_print_vmcs();
*/
+
+ if (basic_info->entry_error == 1) {
+ switch (basic_info->reason) {
+ case VMEXIT_INVALID_GUEST_STATE:
+ PrintError("VM Entry failed due to invalid guest state\n");
+ PrintError("Printing VMCS: (NOTE: This VMCS may not belong to the correct guest)\n");
+ v3_print_vmcs();
+ break;
+ case VMEXIT_INVALID_MSR_LOAD:
+ PrintError("VM Entry failed due to error loading MSRs\n");
+ break;
+ default:
+ PrintError("Entry failed for unknown reason (%d)\n", basic_info->reason);
+ break;
+ }
+
+ return -1;
+ }
+
+
#ifdef CONFIG_TELEMETRY
if (info->vm_info->enable_telemetry) {
v3_telemetry_start_exit(info);
}
#endif
- switch (exit_info->exit_reason) {
+ switch (basic_info->reason) {
case VMEXIT_INFO_EXCEPTION_OR_NMI: {
pf_error_t error_code = *(pf_error_t *)&(exit_info->int_err);
// JRL: Change "0x0e" to a macro value
- if ((uint8_t)exit_info->int_info == 0x0e) {
+ if ((uint8_t)exit_info->int_info == 14) {
#ifdef CONFIG_DEBUG_SHADOW_PAGING
PrintDebug("Page Fault at %p error_code=%x\n", (void *)exit_info->exit_qual, *(uint32_t *)&error_code);
#endif
PrintError("Error handling shadow page fault\n");
return -1;
}
+
} else {
PrintError("Page fault in unimplemented paging mode\n");
return -1;
break;
}
+ case VMEXIT_EPT_VIOLATION: {
+ struct ept_exit_qual * ept_qual = (struct ept_exit_qual *)&(exit_info->exit_qual);
+
+ if (v3_handle_ept_fault(info, exit_info->ept_fault_addr, ept_qual) == -1) {
+ PrintError("Error handling EPT fault\n");
+ return -1;
+ }
+
+ break;
+ }
case VMEXIT_INVLPG:
if (info->shdw_pg_mode == SHADOW_PAGING) {
if (v3_handle_shadow_invlpg(info) == -1) {
}
break;
+
+
+
case VMEXIT_PAUSE:
// Handled as NOP
info->rip += 2;
// This is handled in the atomic part of the vmx code,
// not in the generic (interruptable) vmx handler
break;
+
+
default:
PrintError("Unhandled VMEXIT: %s (%u), %lu (0x%lx)\n",
- v3_vmx_exit_code_to_str(exit_info->exit_reason),
- exit_info->exit_reason,
+ v3_vmx_exit_code_to_str(basic_info->reason),
+ basic_info->reason,
exit_info->exit_qual, exit_info->exit_qual);
return -1;
}
static const char VMEXIT_IO_INSTR_STR[] = "VMEXIT_IO_INSTR";
static const char VMEXIT_RDMSR_STR[] = "VMEXIT_RDMSR";
static const char VMEXIT_WRMSR_STR[] = "VMEXIT_WRMSR";
-static const char VMEXIT_ENTRY_FAIL_INVALID_GUEST_STATE_STR[] = "VMEXIT_ENTRY_FAIL_INVALID_GUEST_STATE";
-static const char VMEXIT_ENTRY_FAIL_MSR_LOAD_STR[] = "VMEXIT_ENTRY_FAIL_MSR_LOAD";
+static const char VMEXIT_INVALID_GUEST_STATE_STR[] = "VMEXIT_INVALID_GUEST_STATE";
+static const char VMEXIT_INVALID_MSR_LOAD_STR[] = "VMEXIT_INVALID_MSR_LOAD";
static const char VMEXIT_MWAIT_STR[] = "VMEXIT_MWAIT";
static const char VMEXIT_MONITOR_STR[] = "VMEXIT_MONITOR";
static const char VMEXIT_PAUSE_STR[] = "VMEXIT_PAUSE";
-static const char VMEXIT_ENTRY_FAILURE_MACHINE_CHECK_STR[] = "VMEXIT_ENTRY_FAILURE_MACHINE_CHECK";
+static const char VMEXIT_INVALID_MACHINE_CHECK_STR[] = "VMEXIT_INVALIDE_MACHINE_CHECK";
static const char VMEXIT_TPR_BELOW_THRESHOLD_STR[] = "VMEXIT_TPR_BELOW_THRESHOLD";
static const char VMEXIT_APIC_STR[] = "VMEXIT_APIC";
static const char VMEXIT_GDTR_IDTR_STR[] = "VMEXIT_GDTR_IDTR";
return VMEXIT_RDMSR_STR;
case VMEXIT_WRMSR:
return VMEXIT_WRMSR_STR;
- case VMEXIT_ENTRY_FAIL_INVALID_GUEST_STATE:
- return VMEXIT_ENTRY_FAIL_INVALID_GUEST_STATE_STR;
- case VMEXIT_ENTRY_FAIL_MSR_LOAD:
- return VMEXIT_ENTRY_FAIL_MSR_LOAD_STR;
+ case VMEXIT_INVALID_GUEST_STATE:
+ return VMEXIT_INVALID_GUEST_STATE_STR;
+ case VMEXIT_INVALID_MSR_LOAD:
+ return VMEXIT_INVALID_MSR_LOAD_STR;
case VMEXIT_MWAIT:
return VMEXIT_MWAIT_STR;
case VMEXIT_MONITOR:
return VMEXIT_MONITOR_STR;
case VMEXIT_PAUSE:
return VMEXIT_PAUSE_STR;
- case VMEXIT_ENTRY_FAILURE_MACHINE_CHECK:
- return VMEXIT_ENTRY_FAILURE_MACHINE_CHECK_STR;
+ case VMEXIT_INVALID_MACHINE_CHECK:
+ return VMEXIT_INVALID_MACHINE_CHECK_STR;
case VMEXIT_TPR_BELOW_THRESHOLD:
return VMEXIT_TPR_BELOW_THRESHOLD_STR;
case VMEXIT_APIC:
// Intel VMX Feature MSRs
+uint32_t v3_vmx_get_ctrl_features(struct vmx_ctrl_field * fields) {
+ // features are available if they are hardwired to 1, or the mask is 0 (they can be changed)
+ uint32_t features = 0;
+
+ features = fields->req_val;
+ features |= ~(fields->req_mask);
+
+ return features;
+}
+
static int get_ex_ctrl_caps(struct vmx_hw_info * hw_info, struct vmx_ctrl_field * field,
- uint32_t old_msr, uint32_t true_msr) {
+ uint32_t old_msr, uint32_t true_msr) {
uint32_t old_0; /* Bit is 1 => MB1 */
uint32_t old_1; /* Bit is 0 => MBZ */
uint32_t true_0; /* Bit is 1 => MB1 */