From: Peter Dinda Date: Fri, 3 Aug 2012 16:47:30 +0000 (-0500) Subject: APIC and CR8 changes for vector priorization vs TPR X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=ef99d19f9ee3ff28d07f83c240a557938b3ab0d5;p=palacios.git APIC and CR8 changes for vector priorization vs TPR This patch does the following: - It moves the TPR into the guest core structure, replacing CR8 - It intercepts CR8 reads/writes to update the TPR accordingly - It moves prioritization into the APIC, away from the architecture-specific SVM or VMX handling - It computes APIC PPR and APR dynamically in the APIC - It does vector to PPR priority comparison (this was not done before at all) - It does vector to APR comparison for IPI delivery - It updates checkpointing, etc, to reflect these changes --- diff --git a/linux_module/iface-host-hypercall.c b/linux_module/iface-host-hypercall.c index 76f2f2d..b998b90 100644 --- a/linux_module/iface-host-hypercall.c +++ b/linux_module/iface-host-hypercall.c @@ -45,12 +45,12 @@ static int host_hypercall_nop(palacios_core_t core, (void*)(acc->get_r13(core)), (void*)(acc->get_r14(core)), (void*)(acc->get_r15(core))); - DEBUG(" cr0=%p\n cr2=%p\n cr3=%p\n cr4=%p\n cr8=%p\n efer=%p\n", + DEBUG(" cr0=%p\n cr2=%p\n cr3=%p\n cr4=%p\n apic_tpr=%p\n efer=%p\n", (void*)(acc->get_cr0(core)), (void*)(acc->get_cr2(core)), (void*)(acc->get_cr3(core)), (void*)(acc->get_cr4(core)), - (void*)(acc->get_cr8(core)), + (void*)(acc->get_apic_tpr(core)), (void*)(acc->get_efer(core))); return 0; } diff --git a/linux_module/palacios-vnet-brg.c b/linux_module/palacios-vnet-brg.c index efb7d18..00f0996 100644 --- a/linux_module/palacios-vnet-brg.c +++ b/linux_module/palacios-vnet-brg.c @@ -94,7 +94,7 @@ static inline struct vnet_link * _link_by_idx(int idx) { static void _delete_link(struct vnet_link * link){ - unsigned long flags; + unsigned long flags = 0; link->sock->ops->release(link->sock); @@ -122,7 +122,7 @@ void vnet_brg_delete_link(uint32_t idx){ } static void deinit_links_list(void){ - struct vnet_link * link, * tmp_link; + struct vnet_link * link = NULL, * tmp_link = NULL; list_for_each_entry_safe(link, tmp_link, &(vnet_brg_s.link_list), node) { _delete_link(link); @@ -296,7 +296,9 @@ send_to_palacios(unsigned char * buf, int len, int link_id){ struct v3_vnet_pkt pkt; + memset(pkt,0,sizeof(struct v3_vnet_pkt)); pkt.size = len; + pkt.dst_type = LINK_NOSET; pkt.src_type = LINK_EDGE; pkt.src_id = link_id; memcpy(pkt.header, buf, ETHERNET_HEADER_LEN); @@ -321,7 +323,7 @@ static int bridge_send_pkt(struct v3_vm_info * vm, struct v3_vnet_pkt * pkt, void * private_data) { - struct vnet_link * link; + struct vnet_link * link = NULL; if(net_debug >= 2){ DEBUG("VNET Lnx Host Bridge: packet received from VNET Core ... pkt size: %d, link: %d\n", diff --git a/palacios/include/interfaces/vmm_host_hypercall.h b/palacios/include/interfaces/vmm_host_hypercall.h index 611e700..5fd2525 100644 --- a/palacios/include/interfaces/vmm_host_hypercall.h +++ b/palacios/include/interfaces/vmm_host_hypercall.h @@ -74,7 +74,7 @@ struct guest_accessors { GET_SET_REG_DECL(cr2) GET_SET_REG_DECL(cr3) GET_SET_REG_DECL(cr4) - GET_SET_REG_DECL(cr8) + GET_SET_REG_DECL(apic_tpr) GET_SET_REG_DECL(efer) int (*gva_to_hva)(palacios_core_t core, uint64_t gva, uint64_t *hva); diff --git a/palacios/include/palacios/vm_guest.h b/palacios/include/palacios/vm_guest.h index 9e6ea21..468434f 100644 --- a/palacios/include/palacios/vm_guest.h +++ b/palacios/include/palacios/vm_guest.h @@ -139,7 +139,7 @@ struct guest_info { /* The virtual core # of this cpu (what the guest sees this core as) */ uint32_t vcpu_id; - + }; diff --git a/palacios/include/palacios/vmm_ctrl_regs.h b/palacios/include/palacios/vmm_ctrl_regs.h index e072a04..e487711 100644 --- a/palacios/include/palacios/vmm_ctrl_regs.h +++ b/palacios/include/palacios/vmm_ctrl_regs.h @@ -186,7 +186,6 @@ struct rflags { - /* // First opcode byte static const uchar_t cr_access_byte = 0x0f; @@ -211,6 +210,9 @@ int v3_handle_cr3_read(struct guest_info * info); int v3_handle_cr4_write(struct guest_info * info); int v3_handle_cr4_read(struct guest_info * info); +int v3_handle_cr8_write(struct guest_info * info); +int v3_handle_cr8_read(struct guest_info * info); + int v3_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data); int v3_handle_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * dst, void * priv_data); diff --git a/palacios/include/palacios/vmm_regs.h b/palacios/include/palacios/vmm_regs.h index 32a870e..e116d43 100644 --- a/palacios/include/palacios/vmm_regs.h +++ b/palacios/include/palacios/vmm_regs.h @@ -51,7 +51,7 @@ struct v3_ctrl_regs { v3_reg_t cr2; v3_reg_t cr3; v3_reg_t cr4; - v3_reg_t cr8; + v3_reg_t apic_tpr; // cr8 is (apic_tpr >> 4) & 0xf v3_reg_t rflags; v3_reg_t efer; }; diff --git a/palacios/include/palacios/vmx_ctrl_regs.h b/palacios/include/palacios/vmx_ctrl_regs.h index 789ff4e..441b1e5 100644 --- a/palacios/include/palacios/vmx_ctrl_regs.h +++ b/palacios/include/palacios/vmx_ctrl_regs.h @@ -33,6 +33,8 @@ int v3_vmx_handle_cr3_access(struct guest_info * info, struct vmx_exit_cr_qual * cr_qual); int v3_vmx_handle_cr4_access(struct guest_info * info, struct vmx_exit_cr_qual * cr_qual); +int v3_vmx_handle_cr8_access(struct guest_info * info, + struct vmx_exit_cr_qual * cr_qual); #endif diff --git a/palacios/src/devices/apic.c b/palacios/src/devices/apic.c index 9e636b6..5576499 100644 --- a/palacios/src/devices/apic.c +++ b/palacios/src/devices/apic.c @@ -215,9 +215,9 @@ struct apic_state { struct int_cmd_reg int_cmd; struct log_dst_reg log_dst; struct dst_fmt_reg dst_fmt; - struct arb_prio_reg arb_prio; - struct task_prio_reg task_prio; - struct proc_prio_reg proc_prio; + //struct arb_prio_reg arb_prio; // computed on the fly + //struct task_prio_reg task_prio; // stored in core.ctrl_regs.apic_tpr + //struct proc_prio_reg proc_prio; // computed on the fly struct ext_apic_feature_reg ext_apic_feature; struct spec_eoi_reg spec_eoi; @@ -280,6 +280,9 @@ struct apic_dev_state { static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, uint_t length, void * priv_data); static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, uint_t length, void * priv_data); +static void set_apic_tpr(struct apic_state *apic, uint32_t val); + + // No lcoking done static void init_apic_state(struct apic_state * apic, uint32_t id) { apic->base_addr = DEFAULT_BASE_ADDR; @@ -321,9 +324,10 @@ static void init_apic_state(struct apic_state * apic, uint32_t id) { // The P6 has 6 LVT entries, so we set the value to (6-1)... apic->apic_ver.val = 0x80050010; - apic->task_prio.val = 0x00000000; - apic->arb_prio.val = 0x00000000; - apic->proc_prio.val = 0x00000000; + set_apic_tpr(apic,0x00000000); + // note that arbitration priority and processor priority are derived values + // and are computed on the fly + apic->log_dst.val = 0x00000000; apic->dst_fmt.val = 0xffffffff; apic->spurious_int.val = 0x000000ff; @@ -501,7 +505,6 @@ static void drain_irq_entries(struct apic_state * apic) { - static int get_highest_isr(struct apic_state * apic) { int i = 0, j = 0; @@ -546,6 +549,88 @@ static int get_highest_irr(struct apic_state * apic) { } +static uint32_t get_isrv(struct apic_state *apic) +{ + int isr = get_highest_isr(apic); + + if (isr>=0) { + return (uint32_t) isr; + } else { + return 0; + } +} + +static uint32_t get_irrv(struct apic_state *apic) +{ + int irr = get_highest_irr(apic); + + if (irr>=0) { + return (uint32_t) irr; + } else { + return 0; + } +} + + +static uint32_t get_apic_tpr(struct apic_state *apic) +{ + return (uint32_t) (apic->core->ctrl_regs.apic_tpr); // see comment in vmm_ctrl_regs.c for how this works + +} + +static void set_apic_tpr(struct apic_state *apic, uint32_t val) +{ + PrintDebug("Set apic_tpr to 0x%x from apic reg path\n",val); + apic->core->ctrl_regs.apic_tpr = (uint64_t) val; // see comment in vmm_ctrl_regs.c for how this works +} + +static uint32_t get_apic_ppr(struct apic_state *apic) +{ + uint32_t tpr = get_apic_tpr(apic); + uint32_t isrv = get_isrv(apic); + uint32_t tprlevel, isrlevel; + uint32_t ppr; + + tprlevel = (tpr >> 4) & 0xf; + isrlevel = (isrv >> 4) & 0xf; + + if (tprlevel>=isrlevel) { + ppr = tpr; // get class and subclass + } else { + ppr = (isrlevel << 4); // get class only + } + + return ppr; +} + + + +static uint32_t get_apic_apr(struct apic_state *apic) +{ + uint32_t tpr = get_apic_tpr(apic); + uint32_t isrv = get_isrv(apic); + uint32_t irrv = get_irrv(apic); + uint32_t tprlevel, isrlevel, irrlevel; + + tprlevel = (tpr >> 4) & 0xf; + isrlevel = (isrv >> 4) & 0xf; + irrlevel = (irrv >> 4) & 0xf; + + if (tprlevel >= isrlevel) { + if (tprlevel >= irrlevel) { + return tpr; // get both class and subclass + } else { + return irrlevel << 4; // get class only + } + } else { + if (isrlevel >= irrlevel) { + return isrlevel << 4; // get class only + } else { + return irrlevel << 4; // get class only + } + } + +} static int apic_do_eoi(struct guest_info * core, struct apic_state * apic) { @@ -939,6 +1024,7 @@ static int route_ipi(struct apic_dev_state * apic_dev, } } else { // APIC_LOWEST_DELIVERY struct apic_state * cur_best_apic = NULL; + uint32_t cur_best_apr; uint8_t mda = ipi->dst; int i; @@ -963,8 +1049,13 @@ static int route_ipi(struct apic_dev_state * apic_dev, if (cur_best_apic == 0) { cur_best_apic = dest_apic; - } else if (dest_apic->task_prio.val < cur_best_apic->task_prio.val) { - cur_best_apic = dest_apic; + cur_best_apr = get_apic_apr(dest_apic) & 0xf0; + } else { + uint32_t dest_apr = get_apic_apr(dest_apic) & 0xf0; + if (dest_apr < cur_best_apr) { + cur_best_apic = dest_apic; + cur_best_apr = dest_apr; + } } v3_unlock_irqrestore(apic_dev->state_lock, flags); @@ -1081,13 +1172,13 @@ static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, ui val = apic->apic_ver.val; break; case TPR_OFFSET: - val = apic->task_prio.val; + val = get_apic_tpr(apic); break; case APR_OFFSET: - val = apic->arb_prio.val; + val = get_apic_apr(apic); break; case PPR_OFFSET: - val = apic->proc_prio.val; + val = get_apic_ppr(apic); break; case REMOTE_READ_OFFSET: val = apic->rem_rd_data; @@ -1375,7 +1466,7 @@ static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, u apic->lapic_id.val = op_val; break; case TPR_OFFSET: - apic->task_prio.val = op_val; + set_apic_tpr(apic,op_val); break; case LDR_OFFSET: PrintDebug("apic %u: core %u: setting log_dst.val to 0x%x\n", @@ -1543,12 +1634,33 @@ static int apic_intr_pending(struct guest_info * core, void * private_data) { // PrintDebug("apic %u: core %u: req_irq=%d, svc_irq=%d\n",apic->lapic_id.val,info->vcpu_id,req_irq,svc_irq); + if ((req_irq >= 0) && (req_irq > svc_irq)) { - return 1; - } - return 0; + // We have a new requested vector that is higher priority than + // the vector that is in-service + + uint32_t ppr = get_apic_ppr(apic); + + if ((req_irq & 0xf0) > (ppr & 0xf0)) { + // it's also higher priority than the current + // processor priority. Therefore this + // interrupt can go in now. + return 1; + } else { + // processor priority is currently too high + // for this interrupt to go in now. + // note that if tpr=0xf?, then ppr=0xf? + // and thus all vectors will be masked + // as required (tpr=0xf? => all masked) + return 0; + } + } else { + // the vector that is in service is higher + // priority than any new requested vector + return 0; + } } @@ -1559,13 +1671,23 @@ static int apic_get_intr_number(struct guest_info * core, void * private_data) { int req_irq = get_highest_irr(apic); int svc_irq = get_highest_isr(apic); - if (svc_irq == -1) { - return req_irq; - } else if (svc_irq < req_irq) { - return req_irq; - } - return -1; + // for the logic here, see the comments for apic_intr_pending + if ((req_irq >=0) && + (req_irq > svc_irq)) { + + uint32_t ppr = get_apic_ppr(apic); + + if ((req_irq & 0xf0) > (ppr & 0xf0)) { + return req_irq; + } else { + // hmm, this should not have happened, but, anyway, + // no interrupt is currently ready to go in + return -1; + } + } else { + return -1; + } } @@ -1579,7 +1701,6 @@ int v3_apic_send_ipi(struct v3_vm_info * vm, struct v3_gen_ipi * ipi, void * dev - static int apic_begin_irq(struct guest_info * core, void * private_data, int irq) { struct apic_dev_state * apic_dev = (struct apic_dev_state *)(private_data); struct apic_state * apic = &(apic_dev->apics[core->vcpu_id]); @@ -1765,6 +1886,7 @@ static int apic_free(struct apic_dev_state * apic_dev) { static int apic_save(struct v3_chkpt_ctx * ctx, void * private_data) { struct apic_dev_state * apic_state = (struct apic_dev_state *)private_data; int i = 0; + uint32_t temp; V3_CHKPT_STD_SAVE(ctx, apic_state->num_apics); @@ -1789,9 +1911,17 @@ static int apic_save(struct v3_chkpt_ctx * ctx, void * private_data) { V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].int_cmd); V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].log_dst); V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].dst_fmt); - V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].arb_prio); - V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].task_prio); - V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].proc_prio); + + // APR and PPR are stored only for compatability + // TPR is in APIC_TPR, APR and PPR are derived + + temp = get_apic_apr(&(apic_state->apics[i])); + V3_CHKPT_STD_SAVE(ctx, temp); + temp = get_apic_tpr(&(apic_state->apics[i])); + V3_CHKPT_STD_SAVE(ctx, temp); + temp = get_apic_ppr(&(apic_state->apics[i])); + V3_CHKPT_STD_SAVE(ctx, temp); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].ext_apic_feature); V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].spec_eoi); V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].tmr_cur_cnt); @@ -1813,6 +1943,7 @@ static int apic_save(struct v3_chkpt_ctx * ctx, void * private_data) { static int apic_load(struct v3_chkpt_ctx * ctx, void * private_data) { struct apic_dev_state *apic_state = (struct apic_dev_state *)private_data; int i = 0; + uint32_t temp; V3_CHKPT_STD_LOAD(ctx,apic_state->num_apics); @@ -1835,9 +1966,15 @@ static int apic_load(struct v3_chkpt_ctx * ctx, void * private_data) { V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].int_cmd); V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].log_dst); V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].dst_fmt); - V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].arb_prio); - V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].task_prio); - V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].proc_prio); + + // APR is ignored + V3_CHKPT_STD_LOAD(ctx, temp); + // TPR is written back to APIC_TPR + V3_CHKPT_STD_LOAD(ctx, temp); + set_apic_tpr(&(apic_state->apics[i]),temp); + // PPR is ignored + V3_CHKPT_STD_LOAD(ctx, temp); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].ext_apic_feature); V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].spec_eoi); V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].tmr_cur_cnt); diff --git a/palacios/src/interfaces/vmm_host_hypercall.c b/palacios/src/interfaces/vmm_host_hypercall.c index ab57698..a053184 100644 --- a/palacios/src/interfaces/vmm_host_hypercall.c +++ b/palacios/src/interfaces/vmm_host_hypercall.c @@ -64,7 +64,7 @@ GET_SET_CR_IMPL(cr0) GET_SET_CR_IMPL(cr2) GET_SET_CR_IMPL(cr3) GET_SET_CR_IMPL(cr4) -GET_SET_CR_IMPL(cr8) +GET_SET_CR_IMPL(apic_tpr) GET_SET_CR_IMPL(efer) GET_SET_CR_IMPL(rflags) @@ -93,7 +93,7 @@ DECL_IT(cr0) DECL_IT(cr2) DECL_IT(cr3) DECL_IT(cr4) -DECL_IT(cr8) +DECL_IT(apic_tpr) DECL_IT(efer) DECL_IT(rflags) diff --git a/palacios/src/palacios/svm.c b/palacios/src/palacios/svm.c index 996ed2b..a4e9443 100644 --- a/palacios/src/palacios/svm.c +++ b/palacios/src/palacios/svm.c @@ -243,8 +243,20 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) { PrintDebug("Exiting on interrupts\n"); ctrl_area->guest_ctrl.V_INTR_MASKING = 1; ctrl_area->instrs.INTR = 1; + // The above also assures the TPR changes (CR8) are only virtual + // However, we need to see TPR writes since they will + // affect the virtual apic + // we reflect out cr8 to ctrl_regs->apic_tpr + ctrl_area->cr_reads.cr8 = 1; + ctrl_area->cr_writes.cr8 = 1; + // We will do all TPR comparisons in the virtual apic + // We also do not want the V_TPR to be able to mask the PIC + ctrl_area->guest_ctrl.V_IGN_TPR = 1; + + + v3_hook_msr(core->vm_info, EFER_MSR, &v3_handle_efer_read, &v3_svm_handle_efer_write, @@ -279,7 +291,6 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) { ctrl_area->cr_writes.cr3 = 1; - ctrl_area->instrs.INVLPG = 1; ctrl_area->exceptions.pf = 1; @@ -287,7 +298,6 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) { guest_state->g_pat = 0x7040600070406ULL; - } else if (core->shdw_pg_mode == NESTED_PAGING) { // Flush the TLB on entries/exits ctrl_area->TLB_CONTROL = 1; @@ -477,8 +487,11 @@ static int update_irq_entry_state(struct guest_info * info) { #endif guest_ctrl->guest_ctrl.V_IRQ = 1; guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector; + + // We ignore the virtual TPR on this injection + // TPR/PPR tests have already been done in the APIC. guest_ctrl->guest_ctrl.V_IGN_TPR = 1; - guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf; + guest_ctrl->guest_ctrl.V_INTR_PRIO = info->intr_core_state.irq_vector >> 4 ; // 0xf; } else { switch (v3_intr_pending(info)) { @@ -487,8 +500,11 @@ static int update_irq_entry_state(struct guest_info * info) { guest_ctrl->guest_ctrl.V_IRQ = 1; guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq; + + // We ignore the virtual TPR on this injection + // TPR/PPR tests have already been done in the APIC. guest_ctrl->guest_ctrl.V_IGN_TPR = 1; - guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf; + guest_ctrl->guest_ctrl.V_INTR_PRIO = info->intr_core_state.irq_vector >> 4 ; // 0xf; #ifdef V3_CONFIG_DEBUG_INTERRUPTS PrintDebug("Injecting Interrupt %d (EIP=%p)\n", @@ -587,7 +603,17 @@ int v3_svm_enter(struct guest_info * info) { guest_state->cr4 = info->ctrl_regs.cr4; guest_state->dr6 = info->dbg_regs.dr6; guest_state->dr7 = info->dbg_regs.dr7; - guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff; + + // CR8 is now updated by read/writes and it contains the APIC TPR + // the V_TPR should be just the class part of that. + // This update is here just for completeness. We currently + // are ignoring V_TPR on all injections and doing the priority logivc + // in the APIC. + // guest_ctrl->guest_ctrl.V_TPR = ((info->ctrl_regs.apic_tpr) >> 4) & 0xf; + + //guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff; + // + guest_state->rflags = info->ctrl_regs.rflags; guest_state->efer = info->ctrl_regs.efer; @@ -668,7 +694,11 @@ int v3_svm_enter(struct guest_info * info) { info->ctrl_regs.cr4 = guest_state->cr4; info->dbg_regs.dr6 = guest_state->dr6; info->dbg_regs.dr7 = guest_state->dr7; - info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR; + // + // We do not track this anymore + // V_TPR is ignored and we do the logic in the APIC + //info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR; + // info->ctrl_regs.rflags = guest_state->rflags; info->ctrl_regs.efer = guest_state->efer; diff --git a/palacios/src/palacios/svm_handler.c b/palacios/src/palacios/svm_handler.c index 14f4454..4ea49c9 100644 --- a/palacios/src/palacios/svm_handler.c +++ b/palacios/src/palacios/svm_handler.c @@ -165,6 +165,25 @@ int v3_handle_svm_exit(struct guest_info * info, addr_t exit_code, addr_t exit_i return -1; } break; + + case SVM_EXIT_CR8_WRITE: +#ifdef V3_CONFIG_DEBUG_CTRL_REGS + PrintDebug("CR8 Read\n"); +#endif + if (v3_handle_cr8_read(info) == -1) { + return -1; + } + break; + + case SVM_EXIT_CR8_READ: +#ifdef V3_CONFIG_DEBUG_CTRL_REGS + PrintDebug("CR8 Read\n"); +#endif + if (v3_handle_cr8_read(info) == -1) { + return -1; + } + break; + case SVM_EXIT_EXCP14: { addr_t fault_addr = exit_info2; pf_error_t * error_code = (pf_error_t *)&(exit_info1); diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c index ba4a8b0..ee85811 100644 --- a/palacios/src/palacios/vmm.c +++ b/palacios/src/palacios/vmm.c @@ -730,7 +730,7 @@ void v3_yield_cond(struct guest_info * info, int usec) { info->yield_start_cycle += info->vm_info->yield_cycle_period; } } - + /* * unconditional cpu yield diff --git a/palacios/src/palacios/vmm_checkpoint.c b/palacios/src/palacios/vmm_checkpoint.c index e2baf42..9a49149 100644 --- a/palacios/src/palacios/vmm_checkpoint.c +++ b/palacios/src/palacios/vmm_checkpoint.c @@ -609,7 +609,7 @@ static int load_core(struct guest_info * info, struct v3_chkpt * chkpt) { V3_CHKPT_STD_LOAD(ctx, info->ctrl_regs.cr0); V3_CHKPT_STD_LOAD(ctx, info->ctrl_regs.cr2); V3_CHKPT_STD_LOAD(ctx, info->ctrl_regs.cr4); - V3_CHKPT_STD_LOAD(ctx, info->ctrl_regs.cr8); + V3_CHKPT_STD_LOAD(ctx, info->ctrl_regs.apic_tpr); V3_CHKPT_STD_LOAD(ctx, info->ctrl_regs.rflags); V3_CHKPT_STD_LOAD(ctx, info->ctrl_regs.efer); @@ -727,7 +727,7 @@ static int save_core(struct guest_info * info, struct v3_chkpt * chkpt) { V3_CHKPT_STD_SAVE(ctx, info->ctrl_regs.cr0); V3_CHKPT_STD_SAVE(ctx, info->ctrl_regs.cr2); V3_CHKPT_STD_SAVE(ctx, info->ctrl_regs.cr4); - V3_CHKPT_STD_SAVE(ctx, info->ctrl_regs.cr8); + V3_CHKPT_STD_SAVE(ctx, info->ctrl_regs.apic_tpr); V3_CHKPT_STD_SAVE(ctx, info->ctrl_regs.rflags); V3_CHKPT_STD_SAVE(ctx, info->ctrl_regs.efer); diff --git a/palacios/src/palacios/vmm_ctrl_regs.c b/palacios/src/palacios/vmm_ctrl_regs.c index c80f605..acf95a7 100644 --- a/palacios/src/palacios/vmm_ctrl_regs.c +++ b/palacios/src/palacios/vmm_ctrl_regs.c @@ -549,6 +549,119 @@ int v3_handle_cr4_write(struct guest_info * info) { } +/* + The CR8 and APIC TPR interaction are kind of crazy. + + CR8 mandates that the priority class is in bits 3:0 + + The interaction of CR8 and an actual APIC is somewhat implementation dependent, but + a basic current APIC has the priority class at 7:4 and the *subclass* at 3:0 + + The APIC TPR (both fields) can be written as the APIC register + A write to CR8 sets the priority class field, and should zero the subclass + A read from CR8 gets just the priority class field + + In the apic_tpr storage location, we have: + + zeros [class] [subclass] + + Because of this, an APIC implementation should use apic_tpr to store its TPR + In fact, it *should* do this, otherwise its TPR may get out of sync with the architected TPR + + On a CR8 read, we return just + + zeros 0000 [class] + + On a CR8 write, we set the register to + + zeros [class] 0000 + +*/ + +int v3_handle_cr8_write(struct guest_info * info) { + int ret; + uchar_t instr[15]; + struct x86_instr dec_instr; + + if (info->mem_mode == PHYSICAL_MEM) { + ret = v3_read_gpa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr); + } else { + ret = v3_read_gva_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr); + } + + if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) { + PrintError("Could not decode instruction\n"); + return -1; + } + + if (dec_instr.op_type == V3_OP_MOV2CR) { + PrintDebug("MOV2CR8 (cpu_mode=%s)\n", v3_cpu_mode_to_str(info->cpu_mode)); + + if ((info->cpu_mode == LONG) || + (info->cpu_mode == LONG_32_COMPAT)) { + uint64_t *val = (uint64_t *)(dec_instr.src_operand.operand); + + info->ctrl_regs.apic_tpr = (*val & 0xf) << 4; + + V3_Print("Write of CR8 sets apic_tpr to 0x%llx\n",info->ctrl_regs.apic_tpr); + + } else { + // probably should raise exception here + } + } else { + PrintError("Unhandled opcode in handle_cr8_write\n"); + return -1; + } + + info->rip += dec_instr.instr_length; + + return 0; +} + + + +int v3_handle_cr8_read(struct guest_info * info) { + uchar_t instr[15]; + int ret; + struct x86_instr dec_instr; + + if (info->mem_mode == PHYSICAL_MEM) { + ret = v3_read_gpa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr); + } else { + ret = v3_read_gva_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr); + } + + if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) { + PrintError("Could not decode instruction\n"); + return -1; + } + + if (dec_instr.op_type == V3_OP_MOVCR2) { + PrintDebug("MOVCR82 (mode=%s)\n", v3_cpu_mode_to_str(info->cpu_mode)); + + if ((info->cpu_mode == LONG) || + (info->cpu_mode == LONG_32_COMPAT)) { + uint64_t *dst_reg = (uint64_t *)(dec_instr.dst_operand.operand); + + *dst_reg = (info->ctrl_regs.apic_tpr >> 4) & 0xf; + + V3_Print("Read of CR8 (apic_tpr) returns 0x%llx\n",*dst_reg); + + } else { + // probably should raise exception + } + + } else { + PrintError("Unhandled opcode in handle_cr8_read\n"); + return -1; + } + + info->rip += dec_instr.instr_length; + + return 0; +} + + int v3_handle_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * dst, void * priv_data) { PrintDebug("EFER Read HI=%x LO=%x\n", core->shdw_pg_state.guest_efer.hi, core->shdw_pg_state.guest_efer.lo); diff --git a/palacios/src/palacios/vmx.c b/palacios/src/palacios/vmx.c index ed5140f..3fede2b 100644 --- a/palacios/src/palacios/vmx.c +++ b/palacios/src/palacios/vmx.c @@ -187,6 +187,7 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) vmx_state->pin_ctrls.ext_int_exit = 1; + /* We enable the preemption timer by default to measure accurate guest time */ if (avail_pin_ctrls.active_preempt_timer) { V3_Print("VMX Preemption Timer is available\n"); @@ -239,6 +240,11 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) // Setup Guests initial PAT field vmx_ret |= check_vmcs_write(VMCS_GUEST_PAT, 0x0007040600070406LL); + // Capture CR8 mods so that we can keep the apic_tpr correct + vmx_state->pri_proc_ctrls.cr8_ld_exit = 1; + vmx_state->pri_proc_ctrls.cr8_str_exit = 1; + + /* Setup paging */ if (core->shdw_pg_mode == SHADOW_PAGING) { PrintDebug("Creating initial shadow page table\n"); @@ -524,6 +530,7 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) #endif + if (v3_update_vmcs_ctrl_fields(core)) { diff --git a/palacios/src/palacios/vmx_ctrl_regs.c b/palacios/src/palacios/vmx_ctrl_regs.c index 01c5dfe..2b0bfb5 100644 --- a/palacios/src/palacios/vmx_ctrl_regs.c +++ b/palacios/src/palacios/vmx_ctrl_regs.c @@ -100,6 +100,28 @@ int v3_vmx_handle_cr4_access(struct guest_info * info, struct vmx_exit_cr_qual * return -1; } +int v3_vmx_handle_cr8_access(struct guest_info * info, struct vmx_exit_cr_qual * cr_qual) { + if (cr_qual->access_type < 2) { + + if (cr_qual->access_type == 0) { + if (v3_handle_cr8_write(info) != 0) { + PrintError("Could not handle CR8 write\n"); + return -1; + } + } else { + if (v3_handle_cr8_read(info) != 0) { + PrintError("Could not handle CR8 read\n"); + return -1; + } + } + + return 0; + } + + PrintError("Invalid CR8 Access type?? (type=%d)\n", cr_qual->access_type); + return -1; +} + static int handle_mov_to_cr3(struct guest_info * info, v3_reg_t * cr3_reg) { if (info->shdw_pg_mode == SHADOW_PAGING) { diff --git a/palacios/src/palacios/vmx_handler.c b/palacios/src/palacios/vmx_handler.c index 0cf500f..8f7665d 100644 --- a/palacios/src/palacios/vmx_handler.c +++ b/palacios/src/palacios/vmx_handler.c @@ -237,6 +237,12 @@ int v3_handle_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_inf return -1; } break; + case 8: + if (v3_vmx_handle_cr8_access(info, cr_qual) == -1) { + PrintError("Error in CR8 access handler\n"); + return -1; + } + break; default: PrintError("Unhandled CR access: %d\n", cr_qual->cr_id); return -1;