2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
21 #include <devices/apic.h>
22 #include <devices/apic_regs.h>
23 #include <palacios/vmm.h>
24 #include <palacios/vmm_msr.h>
25 #include <palacios/vmm_sprintf.h>
26 #include <palacios/vm_guest.h>
27 #include <palacios/vmm_types.h>
30 #ifndef CONFIG_DEBUG_APIC
32 #define PrintDebug(fmt, args...)
36 typedef enum { APIC_TMR_INT, APIC_THERM_INT, APIC_PERF_INT,
37 APIC_LINT0_INT, APIC_LINT1_INT, APIC_ERR_INT } apic_irq_type_t;
39 #define APIC_FIXED_DELIVERY 0x0
40 #define APIC_SMI_DELIVERY 0x2
41 #define APIC_NMI_DELIVERY 0x4
42 #define APIC_INIT_DELIVERY 0x5
43 #define APIC_EXTINT_DELIVERY 0x7
46 #define BASE_ADDR_MSR 0x0000001B
47 #define DEFAULT_BASE_ADDR 0xfee00000
49 #define APIC_ID_OFFSET 0x020
50 #define APIC_VERSION_OFFSET 0x030
51 #define TPR_OFFSET 0x080
52 #define APR_OFFSET 0x090
53 #define PPR_OFFSET 0x0a0
54 #define EOI_OFFSET 0x0b0
55 #define REMOTE_READ_OFFSET 0x0c0
56 #define LDR_OFFSET 0x0d0
57 #define DFR_OFFSET 0x0e0
58 #define SPURIOUS_INT_VEC_OFFSET 0x0f0
60 #define ISR_OFFSET0 0x100 // 0x100 - 0x170
61 #define ISR_OFFSET1 0x110 // 0x100 - 0x170
62 #define ISR_OFFSET2 0x120 // 0x100 - 0x170
63 #define ISR_OFFSET3 0x130 // 0x100 - 0x170
64 #define ISR_OFFSET4 0x140 // 0x100 - 0x170
65 #define ISR_OFFSET5 0x150 // 0x100 - 0x170
66 #define ISR_OFFSET6 0x160 // 0x100 - 0x170
67 #define ISR_OFFSET7 0x170 // 0x100 - 0x170
69 #define TRIG_OFFSET0 0x180 // 0x180 - 0x1f0
70 #define TRIG_OFFSET1 0x190 // 0x180 - 0x1f0
71 #define TRIG_OFFSET2 0x1a0 // 0x180 - 0x1f0
72 #define TRIG_OFFSET3 0x1b0 // 0x180 - 0x1f0
73 #define TRIG_OFFSET4 0x1c0 // 0x180 - 0x1f0
74 #define TRIG_OFFSET5 0x1d0 // 0x180 - 0x1f0
75 #define TRIG_OFFSET6 0x1e0 // 0x180 - 0x1f0
76 #define TRIG_OFFSET7 0x1f0 // 0x180 - 0x1f0
79 #define IRR_OFFSET0 0x200 // 0x200 - 0x270
80 #define IRR_OFFSET1 0x210 // 0x200 - 0x270
81 #define IRR_OFFSET2 0x220 // 0x200 - 0x270
82 #define IRR_OFFSET3 0x230 // 0x200 - 0x270
83 #define IRR_OFFSET4 0x240 // 0x200 - 0x270
84 #define IRR_OFFSET5 0x250 // 0x200 - 0x270
85 #define IRR_OFFSET6 0x260 // 0x200 - 0x270
86 #define IRR_OFFSET7 0x270 // 0x200 - 0x270
89 #define ESR_OFFSET 0x280
90 #define INT_CMD_LO_OFFSET 0x300
91 #define INT_CMD_HI_OFFSET 0x310
92 #define TMR_LOC_VEC_TBL_OFFSET 0x320
93 #define THERM_LOC_VEC_TBL_OFFSET 0x330
94 #define PERF_CTR_LOC_VEC_TBL_OFFSET 0x340
95 #define LINT0_VEC_TBL_OFFSET 0x350
96 #define LINT1_VEC_TBL_OFFSET 0x360
97 #define ERR_VEC_TBL_OFFSET 0x370
98 #define TMR_INIT_CNT_OFFSET 0x380
99 #define TMR_CUR_CNT_OFFSET 0x390
100 #define TMR_DIV_CFG_OFFSET 0x3e0
101 #define EXT_APIC_FEATURE_OFFSET 0x400
102 #define EXT_APIC_CMD_OFFSET 0x410
103 #define SEOI_OFFSET 0x420
105 #define IER_OFFSET0 0x480 // 0x480 - 0x4f0
106 #define IER_OFFSET1 0x490 // 0x480 - 0x4f0
107 #define IER_OFFSET2 0x4a0 // 0x480 - 0x4f0
108 #define IER_OFFSET3 0x4b0 // 0x480 - 0x4f0
109 #define IER_OFFSET4 0x4c0 // 0x480 - 0x4f0
110 #define IER_OFFSET5 0x4d0 // 0x480 - 0x4f0
111 #define IER_OFFSET6 0x4e0 // 0x480 - 0x4f0
112 #define IER_OFFSET7 0x4f0 // 0x480 - 0x4f0
114 #define EXT_INT_LOC_VEC_TBL_OFFSET0 0x500 // 0x500 - 0x530
115 #define EXT_INT_LOC_VEC_TBL_OFFSET1 0x510 // 0x500 - 0x530
116 #define EXT_INT_LOC_VEC_TBL_OFFSET2 0x520 // 0x500 - 0x530
117 #define EXT_INT_LOC_VEC_TBL_OFFSET3 0x530 // 0x500 - 0x530
128 uint8_t bootstrap_cpu : 1;
130 uint8_t apic_enable : 1;
131 uint64_t base_addr : 40;
133 } __attribute__((packed));
134 } __attribute__((packed));
135 } __attribute__((packed));
139 typedef enum {INIT, SIPI, STARTED} ipi_state_t;
141 struct apic_dev_state;
147 struct apic_msr base_addr_msr;
150 /* memory map registers */
152 struct lapic_id_reg lapic_id;
153 struct apic_ver_reg apic_ver;
154 struct ext_apic_ctrl_reg ext_apic_ctrl;
155 struct local_vec_tbl_reg local_vec_tbl;
156 struct tmr_vec_tbl_reg tmr_vec_tbl;
157 struct tmr_div_cfg_reg tmr_div_cfg;
158 struct lint_vec_tbl_reg lint0_vec_tbl;
159 struct lint_vec_tbl_reg lint1_vec_tbl;
160 struct perf_ctr_loc_vec_tbl_reg perf_ctr_loc_vec_tbl;
161 struct therm_loc_vec_tbl_reg therm_loc_vec_tbl;
162 struct err_vec_tbl_reg err_vec_tbl;
163 struct err_status_reg err_status;
164 struct spurious_int_reg spurious_int;
165 struct int_cmd_reg int_cmd;
166 struct log_dst_reg log_dst;
167 struct dst_fmt_reg dst_fmt;
168 struct arb_prio_reg arb_prio;
169 struct task_prio_reg task_prio;
170 struct proc_prio_reg proc_prio;
171 struct ext_apic_feature_reg ext_apic_feature;
172 struct spec_eoi_reg spec_eoi;
175 uint32_t tmr_cur_cnt;
176 uint32_t tmr_init_cnt;
179 struct local_vec_tbl_reg ext_intr_vec_tbl[4];
181 uint32_t rem_rd_data;
184 ipi_state_t ipi_state;
186 uint8_t int_req_reg[32];
187 uint8_t int_svc_reg[32];
188 uint8_t int_en_reg[32];
189 uint8_t trig_mode_reg[32];
191 struct guest_info * core;
201 struct apic_dev_state {
204 struct apic_state apics[0];
205 } __attribute__((packed));
209 static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, uint_t length, void * priv_data);
210 static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, uint_t length, void * priv_data);
212 static void init_apic_state(struct apic_state * apic, uint32_t id) {
213 apic->base_addr = DEFAULT_BASE_ADDR;
216 // boot processor, enabled
217 apic->base_addr_msr.value = 0x0000000000000900LL;
219 // ap processor, enabled
220 apic->base_addr_msr.value = 0x0000000000000800LL;
223 // same base address regardless of ap or main
224 apic->base_addr_msr.value |= ((uint64_t)DEFAULT_BASE_ADDR);
226 PrintDebug("apic %u: (init_apic_state): msr=0x%llx\n",id, apic->base_addr_msr.value);
228 PrintDebug("apic %u: (init_apic_state): Sizeof Interrupt Request Register %d, should be 32\n",
229 id, (uint_t)sizeof(apic->int_req_reg));
231 memset(apic->int_req_reg, 0, sizeof(apic->int_req_reg));
232 memset(apic->int_svc_reg, 0, sizeof(apic->int_svc_reg));
233 memset(apic->int_en_reg, 0xff, sizeof(apic->int_en_reg));
234 memset(apic->trig_mode_reg, 0, sizeof(apic->trig_mode_reg));
236 apic->eoi = 0x00000000;
237 apic->rem_rd_data = 0x00000000;
238 apic->tmr_init_cnt = 0x00000000;
239 apic->tmr_cur_cnt = 0x00000000;
241 apic->lapic_id.val = id;
243 apic->ipi_state = INIT;
245 // The P6 has 6 LVT entries, so we set the value to (6-1)...
246 apic->apic_ver.val = 0x80050010;
248 apic->task_prio.val = 0x00000000;
249 apic->arb_prio.val = 0x00000000;
250 apic->proc_prio.val = 0x00000000;
251 apic->log_dst.val = 0x00000000;
252 apic->dst_fmt.val = 0xffffffff;
253 apic->spurious_int.val = 0x000000ff;
254 apic->err_status.val = 0x00000000;
255 apic->int_cmd.val = 0x0000000000000000LL;
256 apic->tmr_vec_tbl.val = 0x00010000;
257 apic->therm_loc_vec_tbl.val = 0x00010000;
258 apic->perf_ctr_loc_vec_tbl.val = 0x00010000;
259 apic->lint0_vec_tbl.val = 0x00010000;
260 apic->lint1_vec_tbl.val = 0x00010000;
261 apic->err_vec_tbl.val = 0x00010000;
262 apic->tmr_div_cfg.val = 0x00000000;
263 //apic->ext_apic_feature.val = 0x00000007;
264 apic->ext_apic_feature.val = 0x00040007;
265 apic->ext_apic_ctrl.val = 0x00000000;
266 apic->spec_eoi.val = 0x00000000;
268 v3_lock_init(&(apic->lock));
274 static int read_apic_msr(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) {
275 struct apic_dev_state * apic_dev = (struct apic_dev_state *)priv_data;
276 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
278 PrintDebug("apic %u: core %u: MSR read\n", apic->lapic_id.val, core->cpu_id);
280 dst->value = apic->base_addr;
281 v3_unlock(apic->lock);
286 static int write_apic_msr(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) {
287 struct apic_dev_state * apic_dev = (struct apic_dev_state *)priv_data;
288 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
289 struct v3_mem_region * old_reg = v3_get_mem_region(core->vm_info, core->cpu_id, apic->base_addr);
292 PrintDebug("apic %u: core %u: MSR write\n", apic->lapic_id.val, core->cpu_id);
294 if (old_reg == NULL) {
296 PrintError("apic %u: core %u: APIC Base address region does not exit...\n",
297 apic->lapic_id.val, core->cpu_id);
303 v3_delete_mem_region(core->vm_info, old_reg);
305 apic->base_addr = src.value;
307 if (v3_hook_full_mem(core->vm_info, core->cpu_id, apic->base_addr, apic->base_addr + PAGE_SIZE_4KB, apic_read, apic_write, apic_dev) == -1) {
308 PrintError("apic %u: core %u: Could not hook new APIC Base address\n",
309 apic->lapic_id.val, core->cpu_id);
310 v3_unlock(apic->lock);
314 v3_unlock(apic->lock);
319 // irq_num is the bit offset into a 256 bit buffer...
320 static int activate_apic_irq(struct apic_state * apic, uint32_t irq_num) {
321 int major_offset = (irq_num & ~0x00000007) >> 3;
322 int minor_offset = irq_num & 0x00000007;
323 uint8_t * req_location = apic->int_req_reg + major_offset;
324 uint8_t * en_location = apic->int_en_reg + major_offset;
325 uint8_t flag = 0x1 << minor_offset;
330 // PrintError("apic %u: core ?: Attempting to raise an invalid interrupt: %d\n", apic->lapic_id.val,irq_num);
335 PrintDebug("apic %u: core ?: Raising APIC IRQ %d\n", apic->lapic_id.val, irq_num);
337 if (*req_location & flag) {
338 //V3_Print("Interrupts coallescing\n");
341 if (*en_location & flag) {
342 *req_location |= flag;
344 PrintDebug("apic %u: core ?: Interrupt not enabled... %.2x\n",
345 apic->lapic_id.val, *en_location);
354 static int get_highest_isr(struct apic_state * apic) {
357 // We iterate backwards to find the highest priority
358 for (i = 31; i >= 0; i--) {
359 uint8_t * svc_major = apic->int_svc_reg + i;
361 if ((*svc_major) & 0xff) {
362 for (j = 7; j >= 0; j--) {
363 uint8_t flag = 0x1 << j;
364 if ((*svc_major) & flag) {
365 return ((i * 8) + j);
376 static int get_highest_irr(struct apic_state * apic) {
379 // We iterate backwards to find the highest priority
380 for (i = 31; i >= 0; i--) {
381 uint8_t * req_major = apic->int_req_reg + i;
383 if ((*req_major) & 0xff) {
384 for (j = 7; j >= 0; j--) {
385 uint8_t flag = 0x1 << j;
386 if ((*req_major) & flag) {
387 return ((i * 8) + j);
399 static int apic_do_eoi(struct apic_state * apic) {
400 int isr_irq = get_highest_isr(apic);
403 int major_offset = (isr_irq & ~0x00000007) >> 3;
404 int minor_offset = isr_irq & 0x00000007;
405 uint8_t flag = 0x1 << minor_offset;
406 uint8_t * svc_location = apic->int_svc_reg + major_offset;
408 PrintDebug("apic %u: core ?: Received APIC EOI for IRQ %d\n", apic->lapic_id.val,isr_irq);
410 *svc_location &= ~flag;
412 #ifdef CONFIG_CRAY_XT
414 if ((isr_irq == 238) ||
416 PrintDebug("apic %u: core ?: Acking IRQ %d\n", apic->lapic_id.val,isr_irq);
419 if (isr_irq == 238) {
424 //PrintError("apic %u: core ?: Spurious EOI...\n",apic->lapic_id.val);
431 static int activate_internal_irq(struct apic_state * apic, apic_irq_type_t int_type) {
432 uint32_t vec_num = 0;
433 uint32_t del_mode = 0;
439 vec_num = apic->tmr_vec_tbl.vec;
440 del_mode = APIC_FIXED_DELIVERY;
441 masked = apic->tmr_vec_tbl.mask;
444 vec_num = apic->therm_loc_vec_tbl.vec;
445 del_mode = apic->therm_loc_vec_tbl.msg_type;
446 masked = apic->therm_loc_vec_tbl.mask;
449 vec_num = apic->perf_ctr_loc_vec_tbl.vec;
450 del_mode = apic->perf_ctr_loc_vec_tbl.msg_type;
451 masked = apic->perf_ctr_loc_vec_tbl.mask;
454 vec_num = apic->lint0_vec_tbl.vec;
455 del_mode = apic->lint0_vec_tbl.msg_type;
456 masked = apic->lint0_vec_tbl.mask;
459 vec_num = apic->lint1_vec_tbl.vec;
460 del_mode = apic->lint1_vec_tbl.msg_type;
461 masked = apic->lint1_vec_tbl.mask;
464 vec_num = apic->err_vec_tbl.vec;
465 del_mode = APIC_FIXED_DELIVERY;
466 masked = apic->err_vec_tbl.mask;
469 PrintError("apic %u: core ?: Invalid APIC interrupt type\n", apic->lapic_id.val);
473 // interrupt is masked, don't send
475 PrintDebug("apic %u: core ?: Inerrupt is masked\n", apic->lapic_id.val);
479 if (del_mode == APIC_FIXED_DELIVERY) {
480 //PrintDebug("Activating internal APIC IRQ %d\n", vec_num);
481 return activate_apic_irq(apic, vec_num);
483 PrintError("apic %u: core ?: Unhandled Delivery Mode\n", apic->lapic_id.val);
490 static inline int should_deliver_cluster_ipi(struct guest_info * dst_core,
491 struct apic_state * dst_apic, uint8_t mda) {
493 if ( ((mda & 0xf0) == (dst_apic->log_dst.dst_log_id & 0xf0)) && // (I am in the cluster and
494 ((mda & 0x0f) & (dst_apic->log_dst.dst_log_id & 0x0f)) ) { // I am in the set)
496 PrintDebug("apic %u core %u: accepting clustered IRQ (mda 0x%x == log_dst 0x%x)\n",
497 dst_apic->lapic_id.val, dst_core->cpu_id, mda,
498 dst_apic->log_dst.dst_log_id);
502 PrintDebug("apic %u core %u: rejecting clustered IRQ (mda 0x%x != log_dst 0x%x)\n",
503 dst_apic->lapic_id.val, dst_core->cpu_id, mda, dst_
504 dst_apic->log_dst.dst_log_id);
509 static inline int should_deliver_flat_ipi(struct guest_info * dst_core,
510 struct apic_state * dst_apic, uint8_t mda) {
512 if (dst_apic->log_dst.dst_log_id & mda) { // I am in the set
514 PrintDebug("apic %u core %u: accepting flat IRQ (mda 0x%x == log_dst 0x%x)\n",
515 dst_apic->lapic_id.val, dst_core->cpu_id, mda,
516 dst_apic->log_dst.dst_log_id);
519 PrintDebug("apic %u core %u: rejecting flat IRQ (mda 0x%x != log_dst 0x%x)\n",
520 dst_apic->lapic_id.val, dst_core->cpu_id, mda,
521 dst_apic->log_dst.dst_log_id);
528 static int should_deliver_ipi(struct guest_info * dst_core,
529 struct apic_state * dst_apic, uint8_t mda) {
532 // always deliver broadcast
536 if (dst_apic->dst_fmt.model == 0xf) {
537 return should_deliver_cluster_ipi(dst_core, dst_apic, mda);
538 } else if (dst_apic->dst_fmt.model == 0x0) {
539 return should_deliver_flat_ipi(dst_core, dst_apic, mda);
541 PrintError("apic %u core %u: invalid destination format register value 0x%x for logical mode delivery.\n",
542 dst_apic->lapic_id.val, dst_core->cpu_id, dst_apic->dst_fmt.model);
548 static int deliver_ipi(struct guest_info * core,
549 struct apic_state * src_apic,
550 struct apic_state * dst_apic,
551 uint32_t vector, uint8_t del_mode) {
553 struct guest_info * dst_core = dst_apic->core;
558 case 1: // lowest priority
559 PrintDebug("icc_bus: delivering IRQ to core %u\n", dst_core->cpu_id);
561 activate_apic_irq(dst_apic, vector);
563 if (dst_apic != src_apic) {
564 // Assume core # is same as logical processor for now
565 // TODO FIX THIS FIX THIS
566 // THERE SHOULD BE: guestapicid->virtualapicid map,
567 // cpu_id->logical processor map
568 // host maitains logical proc->phsysical proc
569 PrintDebug("icc_bus: non-local core, forcing it to exit\n");
571 v3_interrupt_cpu(core->vm_info, dst_core->cpu_id, 0);
577 PrintDebug("icc_bus: INIT delivery to core %u\n", dst_core->cpu_id);
579 // TODO: any APIC reset on dest core (shouldn't be needed, but not sure...)
582 if (dst_apic->ipi_state != INIT) {
583 PrintError("icc_bus: Warning: core %u is not in INIT state (mode = %d), ignored\n",
584 dst_core->cpu_id, dst_core->cpu_mode);
585 // Only a warning, since INIT INIT SIPI is common
589 // We transition the target core to SIPI state
590 dst_apic->ipi_state = SIPI; // note: locking should not be needed here
592 // That should be it since the target core should be
593 // waiting in host on this transition
594 // either it's on another core or on a different preemptive thread
595 // in both cases, it will quickly notice this transition
596 // in particular, we should not need to force an exit here
598 PrintDebug("icc_bus: INIT delivery done\n");
605 if (dst_apic->ipi_state != SIPI) {
606 PrintError("icc_bus: core %u is not in SIPI state (mode = %d), ignored!\n",
607 dst_core->cpu_id, dst_core->cpu_mode);
611 // Write the RIP, CS, and descriptor
612 // assume the rest is already good to go
614 // vector VV -> rip at 0
616 // This means we start executing at linear address VV000
618 // So the selector needs to be VV00
619 // and the base needs to be VV000
622 dst_core->segments.cs.selector = vector << 8;
623 dst_core->segments.cs.limit = 0xffff;
624 dst_core->segments.cs.base = vector << 12;
626 PrintDebug("icc_bus: SIPI delivery (0x%x -> 0x%x:0x0) to core %u\n",
627 vec, dst_core->segments.cs.selector, dst_core->cpu_id);
628 // Maybe need to adjust the APIC?
630 // We transition the target core to SIPI state
631 dst_core->core_run_state = CORE_RUNNING; // note: locking should not be needed here
633 // As with INIT, we should not need to do anything else
635 PrintDebug("icc_bus: SIPI delivery done\n");
644 PrintError("IPI %d delivery is unsupported\n", del_mode);
653 static int route_ipi(struct guest_info * core, struct apic_dev_state * apic_dev,
654 struct apic_state * src_apic, uint32_t icr_val) {
655 struct int_cmd_reg * icr = (struct int_cmd_reg *)&icr_val;
656 struct apic_state * dest_apic = NULL;
658 PrintDebug("icc_bus: icc_bus=%p, src_apic=%u, icr_data=%llx, extirq=%u\n",
659 icc_bus, src_apic, icr_data, extirq);
662 // initial sanity checks
663 if (src_apic == NULL) {
664 PrintError("icc_bus: Apparently sending from unregistered apic id=%d\n",
665 src_apic->core->cpu_id);
670 if ((icr->dst_mode == 0) && (icr->dst >= apic_dev->num_apics)) {
671 PrintError("icc_bus: Attempted send to unregistered apic id=%u\n",
676 dest_apic = &(apic_dev->apics[icr->dst]);
679 PrintDebug("icc_bus: IPI %s %u from %s %u to %s %s %u (icr=0x%llx, extirq=%u)\n",
680 deliverymode_str[icr->del_mode],
682 (src_apic == state->ioapic_id) ? "ioapic" : "apic",
684 (icr->dst_mode == 0) ? "(physical)" : "(logical)",
685 shorthand_str[icr->dst_shorthand],
691 switch (icr->dst_shorthand) {
693 case 0: // no shorthand
694 if (icr->dst_mode == 0) {
697 if (deliver_ipi(core, src_apic, dest_apic,
698 icr->vec, icr->del_mode) == -1) {
699 PrintError("Error: Could not deliver IPI\n");
706 uint8_t mda = icr->dst;
708 for (i = 0; i < apic_dev->num_apics; i++) {
709 dest_apic = &(apic_dev->apics[i]);
710 int del_flag = should_deliver_ipi(dest_apic->core, dest_apic, mda);
712 if (del_flag == -1) {
713 PrintError("Error checking delivery mode\n");
715 } else if (del_flag == 1) {
716 if (deliver_ipi(core, src_apic, dest_apic,
717 icr->vec, icr->del_mode) == -1) {
718 PrintError("Error: Could not deliver IPI\n");
729 if (icr->dst_mode == 0) {
730 if (deliver_ipi(core, src_apic, src_apic, icr->vec, icr->del_mode) == -1) {
731 PrintError("Could not deliver IPI\n");
736 PrintError("icc_bus: use of logical delivery in self is not yet supported.\n");
742 case 3: { // all and all-but-me
743 // assuming that logical verus physical doesn't matter
744 // although it is odd that both are used
747 for (i = 0; i < apic_dev->num_apics; i++) {
748 dest_apic = &(apic_dev->apics[i]);
750 if ((dest_apic != src_apic) || (icr->dst_shorthand == 2)) {
751 if (deliver_ipi(core, src_apic, dest_apic, icr->vec, icr->del_mode) == -1) {
752 PrintError("Error: Could not deliver IPI\n");
761 PrintError("Error routing IPI, invalid Mode (%d)\n", icr->dst_shorthand);
771 static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, uint_t length, void * priv_data) {
772 struct apic_dev_state * apic_dev = (struct apic_dev_state *)(priv_data);
773 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
774 addr_t reg_addr = guest_addr - apic->base_addr;
775 struct apic_msr * msr = (struct apic_msr *)&(apic->base_addr_msr.value);
779 PrintDebug("apic %u: core %u: at %p: Read apic address space (%p)\n",
780 apic->lapic_id.val, core->cpu_id, apic, (void *)guest_addr);
782 if (msr->apic_enable == 0) {
783 PrintError("apic %u: core %u: Read from APIC address space with disabled APIC, apic msr=0x%llx\n",
784 apic->lapic_id.val, core->cpu_id, apic->base_addr_msr.value);
790 /* Because "May not be supported" doesn't matter to Linux developers... */
791 /* if (length != 4) { */
792 /* PrintError("Invalid apic read length (%d)\n", length); */
796 switch (reg_addr & ~0x3) {
798 // Well, only an idiot would read from a architectural write only register
800 // PrintError("Attempting to read from write only register\n");
806 val = apic->lapic_id.val;
808 case APIC_VERSION_OFFSET:
809 val = apic->apic_ver.val;
812 val = apic->task_prio.val;
815 val = apic->arb_prio.val;
818 val = apic->proc_prio.val;
820 case REMOTE_READ_OFFSET:
821 val = apic->rem_rd_data;
824 val = apic->log_dst.val;
827 val = apic->dst_fmt.val;
829 case SPURIOUS_INT_VEC_OFFSET:
830 val = apic->spurious_int.val;
833 val = apic->err_status.val;
835 case TMR_LOC_VEC_TBL_OFFSET:
836 val = apic->tmr_vec_tbl.val;
838 case LINT0_VEC_TBL_OFFSET:
839 val = apic->lint0_vec_tbl.val;
841 case LINT1_VEC_TBL_OFFSET:
842 val = apic->lint1_vec_tbl.val;
844 case ERR_VEC_TBL_OFFSET:
845 val = apic->err_vec_tbl.val;
847 case TMR_INIT_CNT_OFFSET:
848 val = apic->tmr_init_cnt;
850 case TMR_DIV_CFG_OFFSET:
851 val = apic->tmr_div_cfg.val;
855 val = *(uint32_t *)(apic->int_en_reg);
858 val = *(uint32_t *)(apic->int_en_reg + 4);
861 val = *(uint32_t *)(apic->int_en_reg + 8);
864 val = *(uint32_t *)(apic->int_en_reg + 12);
867 val = *(uint32_t *)(apic->int_en_reg + 16);
870 val = *(uint32_t *)(apic->int_en_reg + 20);
873 val = *(uint32_t *)(apic->int_en_reg + 24);
876 val = *(uint32_t *)(apic->int_en_reg + 28);
880 val = *(uint32_t *)(apic->int_svc_reg);
883 val = *(uint32_t *)(apic->int_svc_reg + 4);
886 val = *(uint32_t *)(apic->int_svc_reg + 8);
889 val = *(uint32_t *)(apic->int_svc_reg + 12);
892 val = *(uint32_t *)(apic->int_svc_reg + 16);
895 val = *(uint32_t *)(apic->int_svc_reg + 20);
898 val = *(uint32_t *)(apic->int_svc_reg + 24);
901 val = *(uint32_t *)(apic->int_svc_reg + 28);
905 val = *(uint32_t *)(apic->trig_mode_reg);
908 val = *(uint32_t *)(apic->trig_mode_reg + 4);
911 val = *(uint32_t *)(apic->trig_mode_reg + 8);
914 val = *(uint32_t *)(apic->trig_mode_reg + 12);
917 val = *(uint32_t *)(apic->trig_mode_reg + 16);
920 val = *(uint32_t *)(apic->trig_mode_reg + 20);
923 val = *(uint32_t *)(apic->trig_mode_reg + 24);
926 val = *(uint32_t *)(apic->trig_mode_reg + 28);
930 val = *(uint32_t *)(apic->int_req_reg);
933 val = *(uint32_t *)(apic->int_req_reg + 4);
936 val = *(uint32_t *)(apic->int_req_reg + 8);
939 val = *(uint32_t *)(apic->int_req_reg + 12);
942 val = *(uint32_t *)(apic->int_req_reg + 16);
945 val = *(uint32_t *)(apic->int_req_reg + 20);
948 val = *(uint32_t *)(apic->int_req_reg + 24);
951 val = *(uint32_t *)(apic->int_req_reg + 28);
953 case TMR_CUR_CNT_OFFSET:
954 val = apic->tmr_cur_cnt;
957 // We are not going to implement these....
958 case THERM_LOC_VEC_TBL_OFFSET:
959 val = apic->therm_loc_vec_tbl.val;
961 case PERF_CTR_LOC_VEC_TBL_OFFSET:
962 val = apic->perf_ctr_loc_vec_tbl.val;
968 case INT_CMD_LO_OFFSET:
969 val = apic->int_cmd.lo;
971 case INT_CMD_HI_OFFSET:
972 val = apic->int_cmd.hi;
975 // handle current timer count
977 // Unhandled Registers
978 case EXT_INT_LOC_VEC_TBL_OFFSET0:
979 val = apic->ext_intr_vec_tbl[0].val;
981 case EXT_INT_LOC_VEC_TBL_OFFSET1:
982 val = apic->ext_intr_vec_tbl[1].val;
984 case EXT_INT_LOC_VEC_TBL_OFFSET2:
985 val = apic->ext_intr_vec_tbl[2].val;
987 case EXT_INT_LOC_VEC_TBL_OFFSET3:
988 val = apic->ext_intr_vec_tbl[3].val;
992 case EXT_APIC_FEATURE_OFFSET:
993 case EXT_APIC_CMD_OFFSET:
997 PrintError("apic %u: core %u: Read from Unhandled APIC Register: %x (getting zero)\n",
998 apic->lapic_id.val, core->cpu_id, (uint32_t)reg_addr);
1004 uint_t byte_addr = reg_addr & 0x3;
1005 uint8_t * val_ptr = (uint8_t *)dst;
1007 *val_ptr = *(((uint8_t *)&val) + byte_addr);
1009 } else if ((length == 2) &&
1010 ((reg_addr & 0x3) == 0x3)) {
1011 uint_t byte_addr = reg_addr & 0x3;
1012 uint16_t * val_ptr = (uint16_t *)dst;
1013 *val_ptr = *(((uint16_t *)&val) + byte_addr);
1015 } else if (length == 4) {
1016 uint32_t * val_ptr = (uint32_t *)dst;
1020 PrintError("apic %u: core %u: Invalid apic read length (%d)\n",
1021 apic->lapic_id.val, core->cpu_id, length);
1025 PrintDebug("apic %u: core %u: Read finished (val=%x)\n",
1026 apic->lapic_id.val, core->cpu_id, *(uint32_t *)dst);
1035 static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, uint_t length, void * priv_data) {
1036 struct apic_dev_state * apic_dev = (struct apic_dev_state *)(priv_data);
1037 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
1038 addr_t reg_addr = guest_addr - apic->base_addr;
1039 struct apic_msr * msr = (struct apic_msr *)&(apic->base_addr_msr.value);
1040 uint32_t op_val = *(uint32_t *)src;
1042 PrintDebug("apic %u: core %u: at %p and priv_data is at %p\n",
1043 apic->lapic_id.val, core->cpu_id, apic, priv_data);
1045 PrintDebug("Write to address space (%p) (val=%x)\n",
1046 (void *)guest_addr, *(uint32_t *)src);
1048 if (msr->apic_enable == 0) {
1049 PrintError("apic %u: core %u: Write to APIC address space with disabled APIC, apic msr=0x%llx\n",
1050 apic->lapic_id.val, core->cpu_id, apic->base_addr_msr.value);
1056 PrintError("apic %u: core %u: Invalid apic write length (%d)\n",
1057 apic->lapic_id.val, length, core->cpu_id);
1062 case REMOTE_READ_OFFSET:
1063 case APIC_VERSION_OFFSET:
1090 case EXT_APIC_FEATURE_OFFSET:
1092 PrintError("apic %u: core %u: Attempting to write to read only register %p (error)\n",
1093 apic->lapic_id.val, core->cpu_id, (void *)reg_addr);
1099 case APIC_ID_OFFSET:
1100 PrintDebug("apic %u: core %u: my id is being changed to %u\n",
1101 apic->lapic_id.val, core->cpu_id, op_val);
1103 apic->lapic_id.val = op_val;
1106 apic->task_prio.val = op_val;
1109 PrintDebug("apic %u: core %u: setting log_dst.val to 0x%x\n",
1110 apic->lapic_id.val, core->cpu_id, op_val);
1111 apic->log_dst.val = op_val;
1114 apic->dst_fmt.val = op_val;
1116 case SPURIOUS_INT_VEC_OFFSET:
1117 apic->spurious_int.val = op_val;
1120 apic->err_status.val = op_val;
1122 case TMR_LOC_VEC_TBL_OFFSET:
1123 apic->tmr_vec_tbl.val = op_val;
1125 case THERM_LOC_VEC_TBL_OFFSET:
1126 apic->therm_loc_vec_tbl.val = op_val;
1128 case PERF_CTR_LOC_VEC_TBL_OFFSET:
1129 apic->perf_ctr_loc_vec_tbl.val = op_val;
1131 case LINT0_VEC_TBL_OFFSET:
1132 apic->lint0_vec_tbl.val = op_val;
1134 case LINT1_VEC_TBL_OFFSET:
1135 apic->lint1_vec_tbl.val = op_val;
1137 case ERR_VEC_TBL_OFFSET:
1138 apic->err_vec_tbl.val = op_val;
1140 case TMR_INIT_CNT_OFFSET:
1141 apic->tmr_init_cnt = op_val;
1142 apic->tmr_cur_cnt = op_val;
1144 case TMR_CUR_CNT_OFFSET:
1145 apic->tmr_cur_cnt = op_val;
1147 case TMR_DIV_CFG_OFFSET:
1148 apic->tmr_div_cfg.val = op_val;
1152 // Enable mask (256 bits)
1154 *(uint32_t *)(apic->int_en_reg) = op_val;
1157 *(uint32_t *)(apic->int_en_reg + 4) = op_val;
1160 *(uint32_t *)(apic->int_en_reg + 8) = op_val;
1163 *(uint32_t *)(apic->int_en_reg + 12) = op_val;
1166 *(uint32_t *)(apic->int_en_reg + 16) = op_val;
1169 *(uint32_t *)(apic->int_en_reg + 20) = op_val;
1172 *(uint32_t *)(apic->int_en_reg + 24) = op_val;
1175 *(uint32_t *)(apic->int_en_reg + 28) = op_val;
1178 case EXT_INT_LOC_VEC_TBL_OFFSET0:
1179 apic->ext_intr_vec_tbl[0].val = op_val;
1181 case EXT_INT_LOC_VEC_TBL_OFFSET1:
1182 apic->ext_intr_vec_tbl[1].val = op_val;
1184 case EXT_INT_LOC_VEC_TBL_OFFSET2:
1185 apic->ext_intr_vec_tbl[2].val = op_val;
1187 case EXT_INT_LOC_VEC_TBL_OFFSET3:
1188 apic->ext_intr_vec_tbl[3].val = op_val;
1198 case INT_CMD_LO_OFFSET:
1199 apic->int_cmd.lo = op_val;
1201 PrintDebug("apic %u: core %u: sending cmd 0x%llx to apic %u\n",
1202 apic->lapic_id.val, core->cpu_id,
1203 apic->int_cmd.val, apic->int_cmd.dst);
1205 if (route_ipi(core, apic_dev, apic, apic->int_cmd.val) == -1) {
1206 PrintError("IPI Routing failure\n");
1212 case INT_CMD_HI_OFFSET:
1213 apic->int_cmd.hi = op_val;
1217 // Unhandled Registers
1218 case EXT_APIC_CMD_OFFSET:
1221 PrintError("apic %u: core %u: Write to Unhandled APIC Register: %x (ignored)\n",
1222 apic->lapic_id.val, core->cpu_id, (uint32_t)reg_addr);
1227 PrintDebug("apic %u: core %u: Write finished\n", apic->lapic_id.val, core->cpu_id);
1234 /* Interrupt Controller Functions */
1236 // returns 1 if an interrupt is pending, 0 otherwise
1237 static int apic_intr_pending(struct guest_info * core, void * private_data) {
1238 struct apic_dev_state * apic_dev = (struct apic_dev_state *)(private_data);
1239 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
1240 int req_irq = get_highest_irr(apic);
1241 int svc_irq = get_highest_isr(apic);
1243 // PrintDebug("apic %u: core %u: req_irq=%d, svc_irq=%d\n",apic->lapic_id.val,info->cpu_id,req_irq,svc_irq);
1245 if ((req_irq >= 0) &&
1246 (req_irq > svc_irq)) {
1253 static int apic_get_intr_number(struct guest_info * core, void * private_data) {
1254 struct apic_dev_state * apic_dev = (struct apic_dev_state *)(private_data);
1255 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
1256 int req_irq = get_highest_irr(apic);
1257 int svc_irq = get_highest_isr(apic);
1259 if (svc_irq == -1) {
1261 } else if (svc_irq < req_irq) {
1269 int v3_apic_raise_intr(struct v3_vm_info * vm, struct vm_device * dev,
1270 uint32_t irq, uint32_t dst) {
1271 struct apic_dev_state * apic_dev = (struct apic_dev_state *)(dev->private_data);
1272 struct apic_state * apic = &(apic_dev->apics[dst]);
1274 activate_apic_irq(apic, irq);
1276 if (V3_Get_CPU() != dst) {
1277 v3_interrupt_cpu(vm, dst, 0);
1285 static int apic_begin_irq(struct guest_info * core, void * private_data, int irq) {
1286 struct apic_dev_state * apic_dev = (struct apic_dev_state *)(private_data);
1287 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
1288 int major_offset = (irq & ~0x00000007) >> 3;
1289 int minor_offset = irq & 0x00000007;
1290 uint8_t * req_location = apic->int_req_reg + major_offset;
1291 uint8_t * svc_location = apic->int_svc_reg + major_offset;
1292 uint8_t flag = 0x01 << minor_offset;
1294 if (*req_location & flag) {
1295 // we will only pay attention to a begin irq if we
1296 // know that we initiated it!
1297 *svc_location |= flag;
1298 *req_location &= ~flag;
1301 PrintDebug("apic %u: core %u: begin irq for %d ignored since I don't own it\n",
1302 apic->lapic_id.val, info->cpu_id, irq);
1311 /* Timer Functions */
1312 static void apic_update_time(struct guest_info * core,
1313 uint64_t cpu_cycles, uint64_t cpu_freq,
1315 struct apic_dev_state * apic_dev = (struct apic_dev_state *)(priv_data);
1316 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
1318 // The 32 bit GCC runtime is a pile of shit
1320 uint64_t tmr_ticks = 0;
1322 uint32_t tmr_ticks = 0;
1325 uint8_t tmr_div = *(uint8_t *)&(apic->tmr_div_cfg.val);
1326 uint_t shift_num = 0;
1329 // Check whether this is true:
1330 // -> If the Init count is zero then the timer is disabled
1331 // and doesn't just blitz interrupts to the CPU
1332 if ((apic->tmr_init_cnt == 0) ||
1333 ( (apic->tmr_vec_tbl.tmr_mode == APIC_TMR_ONESHOT) &&
1334 (apic->tmr_cur_cnt == 0))) {
1335 //PrintDebug("apic %u: core %u: APIC timer not yet initialized\n",apic->lapic_id.val,info->cpu_id);
1353 case APIC_TMR_DIV16:
1356 case APIC_TMR_DIV32:
1359 case APIC_TMR_DIV64:
1362 case APIC_TMR_DIV128:
1366 PrintError("apic %u: core %u: Invalid Timer Divider configuration\n",
1367 apic->lapic_id.val, core->cpu_id);
1371 tmr_ticks = cpu_cycles >> shift_num;
1372 // PrintDebug("Timer Ticks: %p\n", (void *)tmr_ticks);
1374 if (tmr_ticks < apic->tmr_cur_cnt) {
1375 apic->tmr_cur_cnt -= tmr_ticks;
1377 tmr_ticks -= apic->tmr_cur_cnt;
1378 apic->tmr_cur_cnt = 0;
1381 PrintDebug("apic %u: core %u: Raising APIC Timer interrupt (periodic=%d) (icnt=%d) (div=%d)\n",
1382 apic->lapic_id.val, info->cpu_id,
1383 apic->tmr_vec_tbl.tmr_mode, apic->tmr_init_cnt, shift_num);
1385 if (apic_intr_pending(core, priv_data)) {
1386 PrintDebug("apic %u: core %u: Overriding pending IRQ %d\n",
1387 apic->lapic_id.val, info->cpu_id,
1388 apic_get_intr_number(info, priv_data));
1391 if (activate_internal_irq(apic, APIC_TMR_INT) == -1) {
1392 PrintError("apic %u: core %u: Could not raise Timer interrupt\n",
1393 apic->lapic_id.val, core->cpu_id);
1396 if (apic->tmr_vec_tbl.tmr_mode == APIC_TMR_PERIODIC) {
1397 tmr_ticks = tmr_ticks % apic->tmr_init_cnt;
1398 apic->tmr_cur_cnt = apic->tmr_init_cnt - tmr_ticks;
1406 static struct intr_ctrl_ops intr_ops = {
1407 .intr_pending = apic_intr_pending,
1408 .get_intr_number = apic_get_intr_number,
1409 .begin_irq = apic_begin_irq,
1413 static struct vm_timer_ops timer_ops = {
1414 .update_timer = apic_update_time,
1420 static int apic_free(struct vm_device * dev) {
1422 /* TODO: This should crosscall to force an unhook on each CPU */
1424 // struct apic_state * apic = (struct apic_state *)dev->private_data;
1426 v3_unhook_msr(dev->vm, BASE_ADDR_MSR);
1432 static struct v3_device_ops dev_ops = {
1443 static int apic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
1444 char * dev_id = v3_cfg_val(cfg, "ID");
1445 struct apic_dev_state * apic_dev = NULL;
1448 PrintDebug("apic: creating an APIC for each core\n");
1450 apic_dev = (struct apic_dev_state *)V3_Malloc(sizeof(struct apic_dev_state) +
1451 sizeof(struct apic_state) * vm->num_cores);
1453 apic_dev->num_apics = vm->num_cores;
1455 struct vm_device * dev = v3_allocate_device(dev_id, &dev_ops, apic_dev);
1457 if (v3_attach_device(vm, dev) == -1) {
1458 PrintError("apic: Could not attach device %s\n", dev_id);
1463 for (i = 0; i < vm->num_cores; i++) {
1464 struct apic_state * apic = &(apic_dev->apics[i]);
1465 struct guest_info * core = &(vm->cores[i]);
1469 init_apic_state(apic, i);
1471 v3_register_intr_controller(core, &intr_ops, apic_dev);
1473 v3_add_timer(core, &timer_ops, apic_dev);
1475 v3_hook_full_mem(vm, core->cpu_id, apic->base_addr, apic->base_addr + PAGE_SIZE_4KB, apic_read, apic_write, apic_dev);
1477 PrintDebug("apic %u: (setup device): done, my id is %u\n", i, apic->lapic_id.val);
1480 #ifdef CONFIG_DEBUG_APIC
1481 for (i = 0; i < vm->num_cores; i++) {
1482 struct apic_state * apic = &(apic_dev->apics[i]);
1483 PrintDebug("apic: sanity check: apic %u (at %p) has id %u and msr value %llx\n",
1484 i, apic, apic->lapic_id.val, apic->base_addr_msr.value);
1489 PrintDebug("apic: priv_data is at %p\n", apic_dev);
1491 v3_hook_msr(vm, BASE_ADDR_MSR, read_apic_msr, write_apic_msr, apic_dev);
1498 device_register("LAPIC", apic_init)