X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=blobdiff_plain;f=palacios%2Fsrc%2Fdevices%2Fapic.c;h=6b21f81ae48193d026c5cd0d96f9f7b1c03f098f;hp=ca991a303a173af40d6ed0a19c7b1ff7d82d3c03;hb=5d1bbcc86de011e3f0d115b6f10fd8645cdf855e;hpb=bba68c448a941e0beb562f670f1a5901e5d07a54 diff --git a/palacios/src/devices/apic.c b/palacios/src/devices/apic.c index ca991a3..6b21f81 100644 --- a/palacios/src/devices/apic.c +++ b/palacios/src/devices/apic.c @@ -28,17 +28,27 @@ #include -// -// MUST DO APIC SCAN FOR PHYSICAL DELIVERY -// +#include +#include + +/* The locking in this file is nasty. + * There are 3 different locking approaches that are taken, depending on the APIC operation + * 1. Queue locks. Actual irq insertions are done via queueing irq ops at the dest apic. + * The destination apic's core is responsible for draining the queue, and actually + * setting the vector table. + * 2. State lock. This is a standard lock taken when internal apic state is read/written. + * When an irq's destination is determined this lock is taken to examine the apic's + * addressability. + * 3. VM barrier lock. This is taken when actual VM core state is changed (via SIPI). + */ -#ifndef CONFIG_DEBUG_APIC +#ifndef V3_CONFIG_DEBUG_APIC #undef PrintDebug #define PrintDebug(fmt, args...) - #else + static char * shorthand_str[] = { "(no shorthand)", "(self)", @@ -59,7 +69,6 @@ static char * deliverymode_str[] = { #endif - typedef enum { APIC_TMR_INT, APIC_THERM_INT, APIC_PERF_INT, APIC_LINT0_INT, APIC_LINT1_INT, APIC_ERR_INT } apic_irq_type_t; @@ -170,6 +179,7 @@ struct apic_msr { + typedef enum {INIT_ST, SIPI, STARTED} ipi_state_t; @@ -230,6 +240,9 @@ struct apic_state { struct v3_timer * timer; + + struct v3_queue irq_queue; + uint32_t eoi; @@ -241,6 +254,8 @@ struct apic_state { struct apic_dev_state { int num_apics; + v3_lock_t state_lock; + struct apic_state apics[0]; } __attribute__((packed)); @@ -309,6 +324,9 @@ static void init_apic_state(struct apic_state * apic, uint32_t id) { apic->spec_eoi.val = 0x00000000; + v3_init_queue(&(apic->irq_queue)); + + } @@ -317,9 +335,9 @@ static void init_apic_state(struct apic_state * apic, uint32_t id) { static int read_apic_msr(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) { struct apic_dev_state * apic_dev = (struct apic_dev_state *)priv_data; - struct apic_state * apic = &(apic_dev->apics[core->cpu_id]); + struct apic_state * apic = &(apic_dev->apics[core->vcpu_id]); - PrintDebug("apic %u: core %u: MSR read\n", apic->lapic_id.val, core->cpu_id); + PrintDebug("apic %u: core %u: MSR read\n", apic->lapic_id.val, core->vcpu_id); dst->value = apic->base_addr; @@ -329,16 +347,16 @@ static int read_apic_msr(struct guest_info * core, uint_t msr, v3_msr_t * dst, v static int write_apic_msr(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) { struct apic_dev_state * apic_dev = (struct apic_dev_state *)priv_data; - struct apic_state * apic = &(apic_dev->apics[core->cpu_id]); - struct v3_mem_region * old_reg = v3_get_mem_region(core->vm_info, core->cpu_id, apic->base_addr); + struct apic_state * apic = &(apic_dev->apics[core->vcpu_id]); + struct v3_mem_region * old_reg = v3_get_mem_region(core->vm_info, core->vcpu_id, apic->base_addr); - PrintDebug("apic %u: core %u: MSR write\n", apic->lapic_id.val, core->cpu_id); + PrintDebug("apic %u: core %u: MSR write\n", apic->lapic_id.val, core->vcpu_id); if (old_reg == NULL) { // uh oh... PrintError("apic %u: core %u: APIC Base address region does not exit...\n", - apic->lapic_id.val, core->cpu_id); + apic->lapic_id.val, core->vcpu_id); return -1; } @@ -348,11 +366,11 @@ static int write_apic_msr(struct guest_info * core, uint_t msr, v3_msr_t src, vo apic->base_addr = src.value; - if (v3_hook_full_mem(core->vm_info, core->cpu_id, apic->base_addr, + if (v3_hook_full_mem(core->vm_info, core->vcpu_id, apic->base_addr, apic->base_addr + PAGE_SIZE_4KB, apic_read, apic_write, apic_dev) == -1) { PrintError("apic %u: core %u: Could not hook new APIC Base address\n", - apic->lapic_id.val, core->cpu_id); + apic->lapic_id.val, core->vcpu_id); return -1; } @@ -362,11 +380,10 @@ static int write_apic_msr(struct guest_info * core, uint_t msr, v3_msr_t src, vo } + + + // irq_num is the bit offset into a 256 bit buffer... -// return values -// -1 = error -// 0 = OK, no interrupt needed now -// 1 = OK, interrupt needed now static int activate_apic_irq(struct apic_state * apic, uint32_t irq_num) { int major_offset = (irq_num & ~0x00000007) >> 3; int minor_offset = irq_num & 0x00000007; @@ -375,14 +392,7 @@ static int activate_apic_irq(struct apic_state * apic, uint32_t irq_num) { uint8_t flag = 0x1 << minor_offset; - if (irq_num <= 15 || irq_num>255) { - PrintError("apic %u: core %d: Attempting to raise an invalid interrupt: %d\n", - apic->lapic_id.val, apic->core->cpu_id, irq_num); - return -1; - } - - - PrintDebug("apic %u: core %d: Raising APIC IRQ %d\n", apic->lapic_id.val, apic->core->cpu_id, irq_num); + PrintDebug("apic %u: core %d: Raising APIC IRQ %d\n", apic->lapic_id.val, apic->core->vcpu_id, irq_num); if (*req_location & flag) { PrintDebug("Interrupt %d coallescing\n", irq_num); @@ -394,13 +404,37 @@ static int activate_apic_irq(struct apic_state * apic, uint32_t irq_num) { return 1; } else { PrintDebug("apic %u: core %d: Interrupt not enabled... %.2x\n", - apic->lapic_id.val, apic->core->cpu_id,*en_location); + apic->lapic_id.val, apic->core->vcpu_id, *en_location); } return 0; } +static int add_apic_irq_entry(struct apic_state * apic, uint8_t irq_num) { + + if (irq_num <= 15) { + PrintError("core %d: Attempting to raise an invalid interrupt: %d\n", + apic->core->vcpu_id, irq_num); + return -1; + } + + v3_enqueue(&(apic->irq_queue), (addr_t)irq_num); + + return 0; +} + +static void drain_irq_entries(struct apic_state * apic) { + uint32_t irq = 0; + + while ((irq = (uint32_t)v3_dequeue(&(apic->irq_queue))) != 0) { + activate_apic_irq(apic, irq); + } + +} + + + static int get_highest_isr(struct apic_state * apic) { int i = 0, j = 0; @@ -460,7 +494,7 @@ static int apic_do_eoi(struct apic_state * apic) { *svc_location &= ~flag; -#ifdef CONFIG_CRAY_XT +#ifdef V3_CONFIG_CRAY_XT if ((isr_irq == 238) || (isr_irq == 239)) { @@ -529,7 +563,7 @@ static int activate_internal_irq(struct apic_state * apic, apic_irq_type_t int_t if (del_mode == APIC_FIXED_DELIVERY) { //PrintDebug("Activating internal APIC IRQ %d\n", vec_num); - return activate_apic_irq(apic, vec_num); + return add_apic_irq_entry(apic, vec_num); } else { PrintError("apic %u: core ?: Unhandled Delivery Mode\n", apic->lapic_id.val); return -1; @@ -538,77 +572,108 @@ static int activate_internal_irq(struct apic_state * apic, apic_irq_type_t int_t -static inline int should_deliver_cluster_ipi(struct guest_info * dst_core, +static inline int should_deliver_cluster_ipi(struct apic_dev_state * apic_dev, + struct guest_info * dst_core, struct apic_state * dst_apic, uint8_t mda) { + int ret = 0; + + if ( ((mda & 0xf0) == (dst_apic->log_dst.dst_log_id & 0xf0)) && /* (I am in the cluster and */ ((mda & 0x0f) & (dst_apic->log_dst.dst_log_id & 0x0f)) ) { /* I am in the set) */ + ret = 1; + } else { + ret = 0; + } + + if (ret == 1) { PrintDebug("apic %u core %u: accepting clustered IRQ (mda 0x%x == log_dst 0x%x)\n", - dst_apic->lapic_id.val, dst_core->cpu_id, mda, + dst_apic->lapic_id.val, dst_core->vcpu_id, mda, dst_apic->log_dst.dst_log_id); - - return 1; } else { PrintDebug("apic %u core %u: rejecting clustered IRQ (mda 0x%x != log_dst 0x%x)\n", - dst_apic->lapic_id.val, dst_core->cpu_id, mda, + dst_apic->lapic_id.val, dst_core->vcpu_id, mda, dst_apic->log_dst.dst_log_id); - return 0; } + + return ret; + } -static inline int should_deliver_flat_ipi(struct guest_info * dst_core, +static inline int should_deliver_flat_ipi(struct apic_dev_state * apic_dev, + struct guest_info * dst_core, struct apic_state * dst_apic, uint8_t mda) { - if (dst_apic->log_dst.dst_log_id & mda) { // I am in the set + int ret = 0; - PrintDebug("apic %u core %u: accepting flat IRQ (mda 0x%x == log_dst 0x%x)\n", - dst_apic->lapic_id.val, dst_core->cpu_id, mda, - dst_apic->log_dst.dst_log_id); - return 1; + if ((dst_apic->log_dst.dst_log_id & mda) != 0) { // I am in the set + ret = 1; + } else { + ret = 0; + } - } else { + if (ret == 1) { + PrintDebug("apic %u core %u: accepting flat IRQ (mda 0x%x == log_dst 0x%x)\n", + dst_apic->lapic_id.val, dst_core->vcpu_id, mda, + dst_apic->log_dst.dst_log_id); + } else { PrintDebug("apic %u core %u: rejecting flat IRQ (mda 0x%x != log_dst 0x%x)\n", - dst_apic->lapic_id.val, dst_core->cpu_id, mda, + dst_apic->lapic_id.val, dst_core->vcpu_id, mda, dst_apic->log_dst.dst_log_id); - return 0; - } + } + + + return ret; } -static int should_deliver_ipi(struct guest_info * dst_core, +static int should_deliver_ipi(struct apic_dev_state * apic_dev, + struct guest_info * dst_core, struct apic_state * dst_apic, uint8_t mda) { + addr_t flags = 0; + int ret = 0; + flags = v3_lock_irqsave(apic_dev->state_lock); if (dst_apic->dst_fmt.model == 0xf) { if (mda == 0xff) { /* always deliver broadcast */ - return 1; + ret = 1; + } else { + ret = should_deliver_flat_ipi(apic_dev, dst_core, dst_apic, mda); } - - return should_deliver_flat_ipi(dst_core, dst_apic, mda); - } else if (dst_apic->dst_fmt.model == 0x0) { if (mda == 0xff) { /* always deliver broadcast */ - return 1; + ret = 1; + } else { + ret = should_deliver_cluster_ipi(apic_dev, dst_core, dst_apic, mda); } - return should_deliver_cluster_ipi(dst_core, dst_apic, mda); - } else { + ret = -1; + } + + v3_unlock_irqrestore(apic_dev->state_lock, flags); + + + if (ret == -1) { PrintError("apic %u core %u: invalid destination format register value 0x%x for logical mode delivery.\n", - dst_apic->lapic_id.val, dst_core->cpu_id, dst_apic->dst_fmt.model); - return -1; + dst_apic->lapic_id.val, dst_core->vcpu_id, dst_apic->dst_fmt.model); } + + return ret; } + + // Only the src_apic pointer is used static int deliver_ipi(struct apic_state * src_apic, struct apic_state * dst_apic, @@ -625,44 +690,30 @@ static int deliver_ipi(struct apic_state * src_apic, // lowest priority - // caller needs to have decided which apic to deliver to! - int do_xcall; - - PrintDebug("delivering IRQ %d to core %u\n", vector, dst_core->cpu_id); + PrintDebug("delivering IRQ %d to core %u\n", vector, dst_core->vcpu_id); - do_xcall = activate_apic_irq(dst_apic, vector); + add_apic_irq_entry(dst_apic, vector); - if (do_xcall < 0) { - PrintError("Failed to activate apic irq!\n"); - return -1; - } - - if (do_xcall && (dst_apic != src_apic)) { - // Assume core # is same as logical processor for now - // TODO FIX THIS FIX THIS - // THERE SHOULD BE: guestapicid->virtualapicid map, - // cpu_id->logical processor map - // host maitains logical proc->phsysical proc +#ifdef V3_CONFIG_MULTITHREAD_OS + if (dst_apic != src_apic) { PrintDebug(" non-local core with new interrupt, forcing it to exit now\n"); - -#ifdef CONFIG_MULTITHREAD_OS - v3_interrupt_cpu(dst_core->vm_info, dst_core->cpu_id, 0); -#else - V3_ASSERT(0); -#endif + v3_interrupt_cpu(dst_core->vm_info, dst_core->pcpu_id, 0); } +#endif + break; } case APIC_INIT_DELIVERY: { - PrintDebug(" INIT delivery to core %u\n", dst_core->cpu_id); + PrintDebug(" INIT delivery to core %u\n", dst_core->vcpu_id); // TODO: any APIC reset on dest core (shouldn't be needed, but not sure...) // Sanity check if (dst_apic->ipi_state != INIT_ST) { PrintError(" Warning: core %u is not in INIT state (mode = %d), ignored (assuming this is the deassert)\n", - dst_core->cpu_id, dst_apic->ipi_state); + dst_core->vcpu_id, dst_apic->ipi_state); // Only a warning, since INIT INIT SIPI is common break; } @@ -685,49 +736,70 @@ static int deliver_ipi(struct apic_state * src_apic, // Sanity check if (dst_apic->ipi_state != SIPI) { PrintError(" core %u is not in SIPI state (mode = %d), ignored!\n", - dst_core->cpu_id, dst_apic->ipi_state); + dst_core->vcpu_id, dst_apic->ipi_state); break; } - // Write the RIP, CS, and descriptor - // assume the rest is already good to go - // - // vector VV -> rip at 0 - // CS = VV00 - // This means we start executing at linear address VV000 - // - // So the selector needs to be VV00 - // and the base needs to be VV000 - // - dst_core->rip = 0; - dst_core->segments.cs.selector = vector << 8; - dst_core->segments.cs.limit = 0xffff; - dst_core->segments.cs.base = vector << 12; + v3_reset_vm_core(dst_core, vector); PrintDebug(" SIPI delivery (0x%x -> 0x%x:0x0) to core %u\n", - vector, dst_core->segments.cs.selector, dst_core->cpu_id); + vector, dst_core->segments.cs.selector, dst_core->vcpu_id); // Maybe need to adjust the APIC? // We transition the target core to SIPI state dst_core->core_run_state = CORE_RUNNING; // note: locking should not be needed here dst_apic->ipi_state = STARTED; - + // As with INIT, we should not need to do anything else - + PrintDebug(" SIPI delivery done\n"); - + break; } + + case APIC_EXTINT_DELIVERY: // EXTINT + /* Two possible things to do here: + * 1. Ignore the IPI and assume the 8259a (PIC) will handle it + * 2. Add 32 to the vector and inject it... + * We probably just want to do 1 here, and assume the raise_irq() will hit the 8259a. + */ + return 0; + case APIC_SMI_DELIVERY: case APIC_RES1_DELIVERY: // reserved case APIC_NMI_DELIVERY: - case APIC_EXTINT_DELIVERY: // ExtInt default: PrintError("IPI %d delivery is unsupported\n", del_mode); return -1; } - + return 0; + +} + +static struct apic_state * find_physical_apic(struct apic_dev_state * apic_dev, uint32_t dst_idx) { + struct apic_state * dst_apic = NULL; + addr_t flags; + int i; + + flags = v3_lock_irqsave(apic_dev->state_lock); + + if ( (dst_idx > 0) && (dst_idx < apic_dev->num_apics) ) { + // see if it simply is the core id + if (apic_dev->apics[dst_idx].lapic_id.val == dst_idx) { + dst_apic = &(apic_dev->apics[dst_idx]); + } + } + + for (i = 0; i < apic_dev->num_apics; i++) { + if (apic_dev->apics[i].lapic_id.val == dst_idx) { + dst_apic = &(apic_dev->apics[i]); + } + } + + v3_unlock_irqrestore(apic_dev->state_lock, flags); + + return dst_apic; } @@ -747,48 +819,52 @@ static int route_ipi(struct apic_dev_state * apic_dev, icr->dst, icr->val); + switch (icr->dst_shorthand) { case APIC_SHORTHAND_NONE: // no shorthand if (icr->dst_mode == APIC_DEST_PHYSICAL) { - if (icr->dst >= apic_dev->num_apics) { + dest_apic = find_physical_apic(apic_dev, icr->dst); + + if (dest_apic == NULL) { PrintError("apic: Attempted send to unregistered apic id=%u\n", icr->dst); return -1; } - dest_apic = &(apic_dev->apics[icr->dst]); - - V3_Print("apic: phsyical destination of %u (apic %u at 0x%p)\n", icr->dst,dest_apic->lapic_id.val,dest_apic); - if (deliver_ipi(src_apic, dest_apic, icr->vec, icr->del_mode) == -1) { PrintError("apic: Could not deliver IPI\n"); return -1; } - V3_Print("apic: done\n"); + + PrintDebug("apic: done\n"); } else if (icr->dst_mode == APIC_DEST_LOGICAL) { - if (icr->del_mode!=APIC_LOWEST_DELIVERY ) { + if (icr->del_mode != APIC_LOWEST_DELIVERY) { + int i; + uint8_t mda = icr->dst; + // logical, but not lowest priority // we immediately trigger // fixed, smi, reserved, nmi, init, sipi, etc - int i; - - uint8_t mda = icr->dst; + for (i = 0; i < apic_dev->num_apics; i++) { + int del_flag = 0; dest_apic = &(apic_dev->apics[i]); - int del_flag = should_deliver_ipi(dest_apic->core, dest_apic, mda); + del_flag = should_deliver_ipi(apic_dev, dest_apic->core, dest_apic, mda); if (del_flag == -1) { + PrintError("apic: Error checking delivery mode\n"); return -1; } else if (del_flag == 1) { + if (deliver_ipi(src_apic, dest_apic, icr->vec, icr->del_mode) == -1) { PrintError("apic: Error: Could not deliver IPI\n"); @@ -796,18 +872,19 @@ static int route_ipi(struct apic_dev_state * apic_dev, } } } - } else { //APIC_LOWEST_DELIVERY - // logical, lowest priority - int i; + } else { // APIC_LOWEST_DELIVERY struct apic_state * cur_best_apic = NULL; uint8_t mda = icr->dst; - + int i; + + // logical, lowest priority + for (i = 0; i < apic_dev->num_apics; i++) { int del_flag = 0; dest_apic = &(apic_dev->apics[i]); - del_flag = should_deliver_ipi(dest_apic->core, dest_apic, mda); + del_flag = should_deliver_ipi(apic_dev, dest_apic->core, dest_apic, mda); if (del_flag == -1) { PrintError("apic: Error checking delivery mode\n"); @@ -815,11 +892,18 @@ static int route_ipi(struct apic_dev_state * apic_dev, return -1; } else if (del_flag == 1) { // update priority for lowest priority scan - if (!cur_best_apic) { + addr_t flags = 0; + + flags = v3_lock_irqsave(apic_dev->state_lock); + + if (cur_best_apic == 0) { cur_best_apic = dest_apic; } else if (dest_apic->task_prio.val < cur_best_apic->task_prio.val) { cur_best_apic = dest_apic; } + + v3_unlock_irqrestore(apic_dev->state_lock, flags); + } } @@ -855,6 +939,7 @@ static int route_ipi(struct apic_dev_state * apic_dev, } } else if (icr->dst_mode == APIC_DEST_LOGICAL) { /* logical delivery */ PrintError("apic: use of logical delivery in self (untested)\n"); + if (deliver_ipi(src_apic, src_apic, icr->vec, icr->del_mode) == -1) { PrintError("apic: Could not deliver IPI to self (logical)\n"); return -1; @@ -894,18 +979,18 @@ static int route_ipi(struct apic_dev_state * apic_dev, // External function, expected to acquire lock on apic static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, uint_t length, void * priv_data) { struct apic_dev_state * apic_dev = (struct apic_dev_state *)(priv_data); - struct apic_state * apic = &(apic_dev->apics[core->cpu_id]); + struct apic_state * apic = &(apic_dev->apics[core->vcpu_id]); addr_t reg_addr = guest_addr - apic->base_addr; struct apic_msr * msr = (struct apic_msr *)&(apic->base_addr_msr.value); uint32_t val = 0; PrintDebug("apic %u: core %u: at %p: Read apic address space (%p)\n", - apic->lapic_id.val, core->cpu_id, apic, (void *)guest_addr); + apic->lapic_id.val, core->vcpu_id, apic, (void *)guest_addr); if (msr->apic_enable == 0) { PrintError("apic %u: core %u: Read from APIC address space with disabled APIC, apic msr=0x%llx\n", - apic->lapic_id.val, core->cpu_id, apic->base_addr_msr.value); + apic->lapic_id.val, core->vcpu_id, apic->base_addr_msr.value); return -1; } @@ -1118,7 +1203,7 @@ static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, ui default: PrintError("apic %u: core %u: Read from Unhandled APIC Register: %x (getting zero)\n", - apic->lapic_id.val, core->cpu_id, (uint32_t)reg_addr); + apic->lapic_id.val, core->vcpu_id, (uint32_t)reg_addr); return -1; } @@ -1130,7 +1215,7 @@ static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, ui *val_ptr = *(((uint8_t *)&val) + byte_addr); } else if ((length == 2) && - ((reg_addr & 0x3) == 0x3)) { + ((reg_addr & 0x3) != 0x3)) { uint_t byte_addr = reg_addr & 0x3; uint16_t * val_ptr = (uint16_t *)dst; *val_ptr = *(((uint16_t *)&val) + byte_addr); @@ -1141,12 +1226,12 @@ static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, ui } else { PrintError("apic %u: core %u: Invalid apic read length (%d)\n", - apic->lapic_id.val, core->cpu_id, length); + apic->lapic_id.val, core->vcpu_id, length); return -1; } PrintDebug("apic %u: core %u: Read finished (val=%x)\n", - apic->lapic_id.val, core->cpu_id, *(uint32_t *)dst); + apic->lapic_id.val, core->vcpu_id, *(uint32_t *)dst); return length; } @@ -1157,27 +1242,28 @@ static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, ui */ static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, uint_t length, void * priv_data) { struct apic_dev_state * apic_dev = (struct apic_dev_state *)(priv_data); - struct apic_state * apic = &(apic_dev->apics[core->cpu_id]); + struct apic_state * apic = &(apic_dev->apics[core->vcpu_id]); addr_t reg_addr = guest_addr - apic->base_addr; struct apic_msr * msr = (struct apic_msr *)&(apic->base_addr_msr.value); uint32_t op_val = *(uint32_t *)src; + addr_t flags = 0; PrintDebug("apic %u: core %u: at %p and priv_data is at %p\n", - apic->lapic_id.val, core->cpu_id, apic, priv_data); + apic->lapic_id.val, core->vcpu_id, apic, priv_data); PrintDebug("apic %u: core %u: write to address space (%p) (val=%x)\n", - apic->lapic_id.val, core->cpu_id, (void *)guest_addr, *(uint32_t *)src); + apic->lapic_id.val, core->vcpu_id, (void *)guest_addr, *(uint32_t *)src); if (msr->apic_enable == 0) { PrintError("apic %u: core %u: Write to APIC address space with disabled APIC, apic msr=0x%llx\n", - apic->lapic_id.val, core->cpu_id, apic->base_addr_msr.value); + apic->lapic_id.val, core->vcpu_id, apic->base_addr_msr.value); return -1; } if (length != 4) { PrintError("apic %u: core %u: Invalid apic write length (%d)\n", - apic->lapic_id.val, length, core->cpu_id); + apic->lapic_id.val, length, core->vcpu_id); return -1; } @@ -1213,14 +1299,14 @@ static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, u case EXT_APIC_FEATURE_OFFSET: PrintError("apic %u: core %u: Attempting to write to read only register %p (error)\n", - apic->lapic_id.val, core->cpu_id, (void *)reg_addr); + apic->lapic_id.val, core->vcpu_id, (void *)reg_addr); break; // Data registers case APIC_ID_OFFSET: - V3_Print("apic %u: core %u: my id is being changed to %u\n", - apic->lapic_id.val, core->cpu_id, op_val); + //V3_Print("apic %u: core %u: my id is being changed to %u\n", + // apic->lapic_id.val, core->vcpu_id, op_val); apic->lapic_id.val = op_val; break; @@ -1229,11 +1315,15 @@ static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, u break; case LDR_OFFSET: PrintDebug("apic %u: core %u: setting log_dst.val to 0x%x\n", - apic->lapic_id.val, core->cpu_id, op_val); + apic->lapic_id.val, core->vcpu_id, op_val); + flags = v3_lock_irqsave(apic_dev->state_lock); apic->log_dst.val = op_val; + v3_unlock_irqrestore(apic_dev->state_lock, flags); break; case DFR_OFFSET: + flags = v3_lock_irqsave(apic_dev->state_lock); apic->dst_fmt.val = op_val; + v3_unlock_irqrestore(apic_dev->state_lock, flags); break; case SPURIOUS_INT_VEC_OFFSET: apic->spurious_int.val = op_val; @@ -1267,6 +1357,8 @@ static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, u apic->tmr_cur_cnt = op_val; break; case TMR_DIV_CFG_OFFSET: + PrintDebug("apic %u: core %u: setting tmr_div_cfg to 0x%x\n", + apic->lapic_id.val, core->vcpu_id, op_val); apic->tmr_div_cfg.val = op_val; break; @@ -1327,18 +1419,19 @@ static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, u tmp_icr = apic->int_cmd; // V3_Print("apic %u: core %u: sending cmd 0x%llx to apic %u\n", - // apic->lapic_id.val, core->cpu_id, + // apic->lapic_id.val, core->vcpu_id, // apic->int_cmd.val, apic->int_cmd.dst); if (route_ipi(apic_dev, apic, &tmp_icr) == -1) { PrintError("IPI Routing failure\n"); return -1; } + break; } case INT_CMD_HI_OFFSET: { apic->int_cmd.hi = op_val; - V3_Print("apic %u: core %u: writing command high=0x%x\n", apic->lapic_id.val, core->cpu_id,apic->int_cmd.hi); + V3_Print("apic %u: core %u: writing command high=0x%x\n", apic->lapic_id.val, core->vcpu_id,apic->int_cmd.hi); break; } @@ -1347,12 +1440,12 @@ static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, u case SEOI_OFFSET: default: PrintError("apic %u: core %u: Write to Unhandled APIC Register: %x (ignored)\n", - apic->lapic_id.val, core->cpu_id, (uint32_t)reg_addr); + apic->lapic_id.val, core->vcpu_id, (uint32_t)reg_addr); return -1; } - PrintDebug("apic %u: core %u: Write finished\n", apic->lapic_id.val, core->cpu_id); + PrintDebug("apic %u: core %u: Write finished\n", apic->lapic_id.val, core->vcpu_id); return length; @@ -1365,11 +1458,18 @@ static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, u static int apic_intr_pending(struct guest_info * core, void * private_data) { struct apic_dev_state * apic_dev = (struct apic_dev_state *)(private_data); - struct apic_state * apic = &(apic_dev->apics[core->cpu_id]); - int req_irq = get_highest_irr(apic); - int svc_irq = get_highest_isr(apic); + struct apic_state * apic = &(apic_dev->apics[core->vcpu_id]); + int req_irq = 0; + int svc_irq = 0; + + // Activate all queued IRQ entries + drain_irq_entries(apic); + + // Check for newly activated entries + req_irq = get_highest_irr(apic); + svc_irq = get_highest_isr(apic); - // PrintDebug("apic %u: core %u: req_irq=%d, svc_irq=%d\n",apic->lapic_id.val,info->cpu_id,req_irq,svc_irq); + // PrintDebug("apic %u: core %u: req_irq=%d, svc_irq=%d\n",apic->lapic_id.val,info->vcpu_id,req_irq,svc_irq); if ((req_irq >= 0) && (req_irq > svc_irq)) { @@ -1383,7 +1483,7 @@ static int apic_intr_pending(struct guest_info * core, void * private_data) { static int apic_get_intr_number(struct guest_info * core, void * private_data) { struct apic_dev_state * apic_dev = (struct apic_dev_state *)(private_data); - struct apic_state * apic = &(apic_dev->apics[core->cpu_id]); + struct apic_state * apic = &(apic_dev->apics[core->vcpu_id]); int req_irq = get_highest_irr(apic); int svc_irq = get_highest_isr(apic); @@ -1422,25 +1522,16 @@ int v3_apic_raise_intr(struct v3_vm_info * vm, uint32_t irq, uint32_t dst, void struct apic_dev_state * apic_dev = (struct apic_dev_state *) (((struct vm_device*)dev_data)->private_data); struct apic_state * apic = &(apic_dev->apics[dst]); - int do_xcall; PrintDebug("apic %u core ?: raising interrupt IRQ %u (dst = %u).\n", apic->lapic_id.val, irq, dst); - do_xcall = activate_apic_irq(apic, irq); + add_apic_irq_entry(apic, irq); - if (do_xcall < 0) { - PrintError("Failed to activate apic irq\n"); - return -1; - } - - if (do_xcall > 0 && (V3_Get_CPU() != dst)) { -#ifdef CONFIG_MULTITHREAD_OS +#ifdef V3_CONFIG_MULTITHREAD_OS + if ((V3_Get_CPU() != dst)) { v3_interrupt_cpu(vm, dst, 0); -#else - V3_ASSERT(0); -#endif - } +#endif return 0; } @@ -1449,7 +1540,7 @@ int v3_apic_raise_intr(struct v3_vm_info * vm, uint32_t irq, uint32_t dst, void static int apic_begin_irq(struct guest_info * core, void * private_data, int irq) { struct apic_dev_state * apic_dev = (struct apic_dev_state *)(private_data); - struct apic_state * apic = &(apic_dev->apics[core->cpu_id]); + struct apic_state * apic = &(apic_dev->apics[core->vcpu_id]); int major_offset = (irq & ~0x00000007) >> 3; int minor_offset = irq & 0x00000007; uint8_t *req_location = apic->int_req_reg + major_offset; @@ -1464,7 +1555,7 @@ static int apic_begin_irq(struct guest_info * core, void * private_data, int irq } else { // do nothing... //PrintDebug("apic %u: core %u: begin irq for %d ignored since I don't own it\n", - // apic->lapic_id.val, core->cpu_id, irq); + // apic->lapic_id.val, core->vcpu_id, irq); } return 0; @@ -1480,7 +1571,7 @@ static void apic_update_time(struct guest_info * core, uint64_t cpu_cycles, uint64_t cpu_freq, void * priv_data) { struct apic_dev_state * apic_dev = (struct apic_dev_state *)(priv_data); - struct apic_state * apic = &(apic_dev->apics[core->cpu_id]); + struct apic_state * apic = &(apic_dev->apics[core->vcpu_id]); // The 32 bit GCC runtime is a pile of shit #ifdef __V3_64BIT__ @@ -1499,7 +1590,7 @@ static void apic_update_time(struct guest_info * core, if ((apic->tmr_init_cnt == 0) || ( (apic->tmr_vec_tbl.tmr_mode == APIC_TMR_ONESHOT) && (apic->tmr_cur_cnt == 0))) { - //PrintDebug("apic %u: core %u: APIC timer not yet initialized\n",apic->lapic_id.val,info->cpu_id); + //PrintDebug("apic %u: core %u: APIC timer not yet initialized\n",apic->lapic_id.val,info->vcpu_id); return; } @@ -1531,7 +1622,7 @@ static void apic_update_time(struct guest_info * core, break; default: PrintError("apic %u: core %u: Invalid Timer Divider configuration\n", - apic->lapic_id.val, core->cpu_id); + apic->lapic_id.val, core->vcpu_id); return; } @@ -1546,21 +1637,34 @@ static void apic_update_time(struct guest_info * core, // raise irq PrintDebug("apic %u: core %u: Raising APIC Timer interrupt (periodic=%d) (icnt=%d) (div=%d)\n", - apic->lapic_id.val, core->cpu_id, + apic->lapic_id.val, core->vcpu_id, apic->tmr_vec_tbl.tmr_mode, apic->tmr_init_cnt, shift_num); if (apic_intr_pending(core, priv_data)) { PrintDebug("apic %u: core %u: Overriding pending IRQ %d\n", - apic->lapic_id.val, core->cpu_id, + apic->lapic_id.val, core->vcpu_id, apic_get_intr_number(core, priv_data)); } if (activate_internal_irq(apic, APIC_TMR_INT) == -1) { PrintError("apic %u: core %u: Could not raise Timer interrupt\n", - apic->lapic_id.val, core->cpu_id); + apic->lapic_id.val, core->vcpu_id); } if (apic->tmr_vec_tbl.tmr_mode == APIC_TMR_PERIODIC) { + static unsigned int nexits = 0; + static unsigned int missed_ints = 0; + + nexits++; + missed_ints += tmr_ticks / apic->tmr_init_cnt; + + if ((missed_ints > 0) && (nexits >= 5000)) { + V3_Print("apic %u: core %u: missed %u timer interrupts total in last %u exits.\n", + apic->lapic_id.val, core->vcpu_id, missed_ints, nexits); + missed_ints = 0; + nexits = 0; + } + tmr_ticks = tmr_ticks % apic->tmr_init_cnt; apic->tmr_cur_cnt = apic->tmr_init_cnt - tmr_ticks; } @@ -1610,9 +1714,109 @@ static int apic_free(struct apic_dev_state * apic_dev) { return 0; } +#ifdef V3_CONFIG_CHECKPOINT +static int apic_save(struct v3_chkpt_ctx * ctx, void * private_data) { + struct apic_dev_state * apic_state = (struct apic_dev_state *)private_data; + int i = 0; + + V3_CHKPT_STD_SAVE(ctx, apic_state->num_apics); + + //V3_CHKPT_STD_SAVE(ctx,apic_state->state_lock); + for (i = 0; i < apic_state->num_apics; i++) { + + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].base_addr); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].base_addr_msr); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].lapic_id); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].apic_ver); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].ext_apic_ctrl); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].local_vec_tbl); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].tmr_vec_tbl); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].tmr_div_cfg); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].lint0_vec_tbl); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].lint1_vec_tbl); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].perf_ctr_loc_vec_tbl); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].therm_loc_vec_tbl); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].err_vec_tbl); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].err_status); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].spurious_int); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].int_cmd); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].log_dst); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].dst_fmt); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].arb_prio); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].task_prio); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].proc_prio); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].ext_apic_feature); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].spec_eoi); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].tmr_cur_cnt); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].tmr_init_cnt); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].ext_intr_vec_tbl); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].rem_rd_data); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].ipi_state); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].int_req_reg); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].int_svc_reg); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].int_en_reg); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].trig_mode_reg); + V3_CHKPT_STD_SAVE(ctx, apic_state->apics[i].eoi); + + } + + return 0; +} + +static int apic_load(struct v3_chkpt_ctx * ctx, void * private_data) { + struct apic_dev_state *apic_state = (struct apic_dev_state *)private_data; + int i = 0; + + V3_CHKPT_STD_LOAD(ctx,apic_state->num_apics); + + for (i = 0; i < apic_state->num_apics; i++) { + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].base_addr); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].base_addr_msr); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].lapic_id); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].apic_ver); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].ext_apic_ctrl); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].local_vec_tbl); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].tmr_vec_tbl); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].tmr_div_cfg); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].lint0_vec_tbl); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].lint1_vec_tbl); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].perf_ctr_loc_vec_tbl); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].therm_loc_vec_tbl); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].err_vec_tbl); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].err_status); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].spurious_int); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].int_cmd); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].log_dst); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].dst_fmt); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].arb_prio); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].task_prio); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].proc_prio); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].ext_apic_feature); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].spec_eoi); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].tmr_cur_cnt); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].tmr_init_cnt); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].ext_intr_vec_tbl); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].rem_rd_data); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].ipi_state); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].int_req_reg); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].int_svc_reg); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].int_en_reg); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].trig_mode_reg); + V3_CHKPT_STD_LOAD(ctx, apic_state->apics[i].eoi); + } + + + return 0; +} + +#endif static struct v3_device_ops dev_ops = { .free = (int (*)(void *))apic_free, +#ifdef V3_CONFIG_CHECKPOINT + .save = apic_save, + .load = apic_load +#endif }; @@ -1628,6 +1832,7 @@ static int apic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { sizeof(struct apic_state) * vm->num_cores); apic_dev->num_apics = vm->num_cores; + v3_lock_init(&(apic_dev->state_lock)); struct vm_device * dev = v3_add_device(vm, dev_id, &dev_ops, apic_dev); @@ -1656,12 +1861,12 @@ static int apic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { return -1; } - v3_hook_full_mem(vm, core->cpu_id, apic->base_addr, apic->base_addr + PAGE_SIZE_4KB, apic_read, apic_write, apic_dev); + v3_hook_full_mem(vm, core->vcpu_id, apic->base_addr, apic->base_addr + PAGE_SIZE_4KB, apic_read, apic_write, apic_dev); PrintDebug("apic %u: (setup device): done, my id is %u\n", i, apic->lapic_id.val); } -#ifdef CONFIG_DEBUG_APIC +#ifdef V3_CONFIG_DEBUG_APIC for (i = 0; i < vm->num_cores; i++) { struct apic_state * apic = &(apic_dev->apics[i]); PrintDebug("apic: sanity check: apic %u (at %p) has id %u and msr value %llx and core at %p\n",