#include <palacios/vm_guest_mem.h>
#include <palacios/vmm_ctrl_regs.h>
#include <palacios/vmm_direct_paging.h>
+#include <palacios/svm.h>
-#ifndef DEBUG_CTRL_REGS
+#ifndef V3_CONFIG_DEBUG_CTRL_REGS
#undef PrintDebug
#define PrintDebug(fmt, args...)
#endif
struct x86_instr dec_instr;
if (info->mem_mode == PHYSICAL_MEM) {
- ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
+ ret = v3_read_gpa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
} else {
- ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
+ ret = v3_read_gva_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
}
if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
*guest_cr0 = *new_cr0;
// This value must always be set to 1
- guest_cr0->et = 1;
+ guest_cr0->et = 1;
// Set the shadow register to catch non-virtualized flags
*shadow_cr0 = *guest_cr0;
// Paging is always enabled
- shadow_cr0->pg = 1;
+ shadow_cr0->pg = 1;
+
+ if (guest_cr0->pg == 0) {
+ // If paging is not enabled by the guest, then we always enable write-protect to catch memory hooks
+ shadow_cr0->wp = 1;
+ }
// Was there a paging transition
// Meaning we need to change the page tables
if (paging_transition) {
if (v3_get_vm_mem_mode(info) == VIRTUAL_MEM) {
- struct efer_64 * guest_efer = (struct efer_64 *)&(info->guest_efer);
+ struct efer_64 * guest_efer = (struct efer_64 *)&(info->shdw_pg_state.guest_efer);
struct efer_64 * shadow_efer = (struct efer_64 *)&(info->ctrl_regs.efer);
// Check long mode LME to set LME
return -1;
}
} else {
+
+ shadow_cr0->wp = 1;
if (v3_activate_passthrough_pt(info) == -1) {
PrintError("Failed to activate passthrough page tables\n");
struct x86_instr dec_instr;
if (info->mem_mode == PHYSICAL_MEM) {
- ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
+ ret = v3_read_gpa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
} else {
- ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
+ ret = v3_read_gva_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
}
struct x86_instr dec_instr;
if (info->mem_mode == PHYSICAL_MEM) {
- ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
+ ret = v3_read_gpa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
} else {
- ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
+ ret = v3_read_gva_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
}
if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.guest_cr3);
*guest_cr3 = *new_cr3;
}
-
+
+
// If Paging is enabled in the guest then we need to change the shadow page tables
if (info->mem_mode == VIRTUAL_MEM) {
if (v3_activate_shadow_pt(info) == -1) {
PrintError("Failed to activate 32 bit shadow page table\n");
return -1;
}
- }
+ }
PrintDebug("New Shadow CR3=%p; New Guest CR3=%p\n",
(void *)(addr_t)(info->ctrl_regs.cr3),
struct x86_instr dec_instr;
if (info->mem_mode == PHYSICAL_MEM) {
- ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
+ ret = v3_read_gpa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
} else {
- ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
+ ret = v3_read_gva_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
}
if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
// We don't need to virtualize CR4, all we need is to detect the activation of PAE
int v3_handle_cr4_read(struct guest_info * info) {
- // PrintError("CR4 Read not handled\n");
+ PrintError("CR4 Read not handled\n");
// Do nothing...
return 0;
}
v3_cpu_mode_t cpu_mode = v3_get_vm_cpu_mode(info);
if (info->mem_mode == PHYSICAL_MEM) {
- ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
+ ret = v3_read_gpa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
} else {
- ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
+ ret = v3_read_gva_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
}
if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
// Check to see if we need to flush the tlb
+
if (v3_get_vm_mem_mode(info) == VIRTUAL_MEM) {
struct cr4_32 * new_cr4 = (struct cr4_32 *)(dec_instr.src_operand.operand);
struct cr4_32 * cr4 = (struct cr4_32 *)&(info->ctrl_regs.cr4);
(cr4->pge != new_cr4->pge) ||
(cr4->pae != new_cr4->pae)) {
PrintDebug("Handling PSE/PGE/PAE -> TLBFlush case, flag set\n");
- flush_tlb=1;
+ flush_tlb = 1;
}
}
return -1;
}
-
- if (flush_tlb) {
- PrintDebug("Handling PSE/PGE/PAE -> TLBFlush (doing flush now!)\n");
- if (v3_activate_shadow_pt(info) == -1) {
- PrintError("Failed to activate shadow page tables when emulating TLB flush in handling cr4 write\n");
- return -1;
+ if (info->shdw_pg_mode == SHADOW_PAGING) {
+ if (flush_tlb) {
+ PrintDebug("Handling PSE/PGE/PAE -> TLBFlush (doing flush now!)\n");
+ if (v3_activate_shadow_pt(info) == -1) {
+ PrintError("Failed to activate shadow page tables when emulating TLB flush in handling cr4 write\n");
+ return -1;
+ }
}
}
-
info->rip += dec_instr.instr_length;
return 0;
}
-int v3_handle_efer_read(uint_t msr, struct v3_msr * dst, void * priv_data) {
- struct guest_info * info = (struct guest_info *)(priv_data);
- PrintDebug("EFER Read HI=%x LO=%x\n", info->guest_efer.hi, info->guest_efer.lo);
+/*
+ The CR8 and APIC TPR interaction are kind of crazy.
+
+ CR8 mandates that the priority class is in bits 3:0
+
+ The interaction of CR8 and an actual APIC is somewhat implementation dependent, but
+ a basic current APIC has the priority class at 7:4 and the *subclass* at 3:0
+
+ The APIC TPR (both fields) can be written as the APIC register
+ A write to CR8 sets the priority class field, and should zero the subclass
+ A read from CR8 gets just the priority class field
+
+ In the apic_tpr storage location, we have:
+
+ zeros [class] [subclass]
+
+ Because of this, an APIC implementation should use apic_tpr to store its TPR
+ In fact, it *should* do this, otherwise its TPR may get out of sync with the architected TPR
+
+ On a CR8 read, we return just
+
+ zeros 0000 [class]
+
+ On a CR8 write, we set the register to
+
+ zeros [class] 0000
+
+*/
+
+int v3_handle_cr8_write(struct guest_info * info) {
+ int ret;
+ uchar_t instr[15];
+ struct x86_instr dec_instr;
- dst->value = info->guest_efer.value;
+ if (info->mem_mode == PHYSICAL_MEM) {
+ ret = v3_read_gpa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
+ } else {
+ ret = v3_read_gva_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
+ }
+
+ if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
+ PrintError("Could not decode instruction\n");
+ return -1;
+ }
+
+ if (dec_instr.op_type == V3_OP_MOV2CR) {
+ PrintDebug("MOV2CR8 (cpu_mode=%s)\n", v3_cpu_mode_to_str(info->cpu_mode));
+
+ if ((info->cpu_mode == LONG) ||
+ (info->cpu_mode == LONG_32_COMPAT)) {
+ uint64_t *val = (uint64_t *)(dec_instr.src_operand.operand);
+
+ info->ctrl_regs.apic_tpr = (*val & 0xf) << 4;
+
+ V3_Print("Write of CR8 sets apic_tpr to 0x%llx\n",info->ctrl_regs.apic_tpr);
+
+ } else {
+ // probably should raise exception here
+ }
+ } else {
+ PrintError("Unhandled opcode in handle_cr8_write\n");
+ return -1;
+ }
+
+ info->rip += dec_instr.instr_length;
- info->rip += 2; // WRMSR/RDMSR are two byte operands
return 0;
}
-// TODO: this is a disaster we need to clean this up...
-int v3_handle_efer_write(uint_t msr, struct v3_msr src, void * priv_data) {
- struct guest_info * info = (struct guest_info *)(priv_data);
- //struct efer_64 * new_efer = (struct efer_64 *)&(src.value);
- struct efer_64 * shadow_efer = (struct efer_64 *)&(info->ctrl_regs.efer);
- struct v3_msr * guest_efer = &(info->guest_efer);
+int v3_handle_cr8_read(struct guest_info * info) {
+ uchar_t instr[15];
+ int ret;
+ struct x86_instr dec_instr;
- PrintDebug("EFER Write\n");
- PrintDebug("EFER Write Values: HI=%x LO=%x\n", src.hi, src.lo);
- //PrintDebug("Old EFER=%p\n", (void *)*(addr_t*)(shadow_efer));
+ if (info->mem_mode == PHYSICAL_MEM) {
+ ret = v3_read_gpa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
+ } else {
+ ret = v3_read_gva_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
+ }
- // We virtualize the guests efer to hide the SVME and LMA bits
- guest_efer->value = src.value;
+ if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
+ PrintError("Could not decode instruction\n");
+ return -1;
+ }
+ if (dec_instr.op_type == V3_OP_MOVCR2) {
+ PrintDebug("MOVCR82 (mode=%s)\n", v3_cpu_mode_to_str(info->cpu_mode));
+
+ if ((info->cpu_mode == LONG) ||
+ (info->cpu_mode == LONG_32_COMPAT)) {
+ uint64_t *dst_reg = (uint64_t *)(dec_instr.dst_operand.operand);
+
+ *dst_reg = (info->ctrl_regs.apic_tpr >> 4) & 0xf;
+
+ V3_Print("Read of CR8 (apic_tpr) returns 0x%llx\n",*dst_reg);
+
+ } else {
+ // probably should raise exception
+ }
+
+ } else {
+ PrintError("Unhandled opcode in handle_cr8_read\n");
+ return -1;
+ }
- // Enable/Disable Syscall
- shadow_efer->sce = src.value & 0x1;
+ info->rip += dec_instr.instr_length;
- info->rip += 2; // WRMSR/RDMSR are two byte operands
+ return 0;
+}
+
+
+int v3_handle_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * dst, void * priv_data) {
+ PrintDebug("EFER Read HI=%x LO=%x\n", core->shdw_pg_state.guest_efer.hi, core->shdw_pg_state.guest_efer.lo);
+
+ dst->value = core->shdw_pg_state.guest_efer.value;
+
+ return 0;
+}
+
+
+int v3_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
+ struct v3_msr * vm_efer = &(core->shdw_pg_state.guest_efer);
+ struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
+ struct efer_64 old_hw_efer = *((struct efer_64 *)&core->ctrl_regs.efer);
+
+ PrintDebug("EFER Write HI=%x LO=%x\n", src.hi, src.lo);
+
+ // Set EFER value seen by guest if it reads EFER
+ vm_efer->value = src.value;
+
+ // Set EFER value seen by hardware while the guest is running
+ *(uint64_t *)hw_efer = src.value;
+
+ // We have gotten here either because we are using
+ // shadow paging, or we are using nested paging on SVM
+ // In the latter case, we don't need to do anything
+ // like the following
+ if (core->shdw_pg_mode == SHADOW_PAGING) {
+ // Catch unsupported features
+ if ((old_hw_efer.lme == 1) && (hw_efer->lme == 0)) {
+ PrintError("Disabling long mode once it has been enabled is not supported\n");
+ return -1;
+ }
+
+ // Set LME and LMA bits seen by hardware
+ if (old_hw_efer.lme == 0) {
+ // Long mode was not previously enabled, so the lme bit cannot
+ // be set yet. It will be set later when the guest sets CR0.PG
+ // to enable paging.
+ hw_efer->lme = 0;
+ } else {
+ // Long mode was previously enabled. Ensure LMA bit is set.
+ // VMX does not automatically set LMA, and this should not affect SVM.
+ hw_efer->lma = 1;
+ }
+ }
+
+
+ PrintDebug("RIP=%p\n", (void *)core->rip);
+ PrintDebug("New EFER value HW(hi=%p), VM(hi=%p)\n", (void *)*(uint64_t *)hw_efer, (void *)vm_efer->value);
+
+
+ return 0;
+}
+
+int v3_handle_vm_cr_read(struct guest_info * core, uint_t msr, struct v3_msr * dst, void * priv_data) {
+ /* tell the guest that the BIOS disabled SVM, that way it doesn't get
+ * confused by the fact that CPUID reports SVM as available but it still
+ * cannot be used
+ */
+ dst->value = SVM_VM_CR_MSR_lock | SVM_VM_CR_MSR_svmdis;
+ PrintDebug("VM_CR Read HI=%x LO=%x\n", dst->hi, dst->lo);
+ return 0;
+}
+
+int v3_handle_vm_cr_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
+ PrintDebug("VM_CR Write\n");
+ PrintDebug("VM_CR Write Values: HI=%x LO=%x\n", src.hi, src.lo);
+
+ /* writes to LOCK and SVMDIS are silently ignored (according to the spec),
+ * other writes indicate the guest wants to use some feature we haven't
+ * implemented
+ */
+ if (src.value & ~(SVM_VM_CR_MSR_lock | SVM_VM_CR_MSR_svmdis)) {
+ PrintDebug("VM_CR write sets unsupported bits: HI=%x LO=%x\n", src.hi, src.lo);
+ return -1;
+ }
return 0;
}