2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
21 #include <devices/apic.h>
22 #include <devices/apic_regs.h>
23 #include <palacios/vmm.h>
24 #include <palacios/vmm_msr.h>
25 #include <palacios/vmm_sprintf.h>
26 #include <palacios/vm_guest.h>
27 #include <palacios/vmm_types.h>
30 #ifndef CONFIG_DEBUG_APIC
32 #define PrintDebug(fmt, args...)
35 #ifdef CONFIG_DEBUG_APIC
36 static char * shorthand_str[] = {
43 static char * deliverymode_str[] = {
55 typedef enum { APIC_TMR_INT, APIC_THERM_INT, APIC_PERF_INT,
56 APIC_LINT0_INT, APIC_LINT1_INT, APIC_ERR_INT } apic_irq_type_t;
58 #define APIC_FIXED_DELIVERY 0x0
59 #define APIC_SMI_DELIVERY 0x2
60 #define APIC_NMI_DELIVERY 0x4
61 #define APIC_INIT_DELIVERY 0x5
62 #define APIC_EXTINT_DELIVERY 0x7
65 #define BASE_ADDR_MSR 0x0000001B
66 #define DEFAULT_BASE_ADDR 0xfee00000
68 #define APIC_ID_OFFSET 0x020
69 #define APIC_VERSION_OFFSET 0x030
70 #define TPR_OFFSET 0x080
71 #define APR_OFFSET 0x090
72 #define PPR_OFFSET 0x0a0
73 #define EOI_OFFSET 0x0b0
74 #define REMOTE_READ_OFFSET 0x0c0
75 #define LDR_OFFSET 0x0d0
76 #define DFR_OFFSET 0x0e0
77 #define SPURIOUS_INT_VEC_OFFSET 0x0f0
79 #define ISR_OFFSET0 0x100 // 0x100 - 0x170
80 #define ISR_OFFSET1 0x110 // 0x100 - 0x170
81 #define ISR_OFFSET2 0x120 // 0x100 - 0x170
82 #define ISR_OFFSET3 0x130 // 0x100 - 0x170
83 #define ISR_OFFSET4 0x140 // 0x100 - 0x170
84 #define ISR_OFFSET5 0x150 // 0x100 - 0x170
85 #define ISR_OFFSET6 0x160 // 0x100 - 0x170
86 #define ISR_OFFSET7 0x170 // 0x100 - 0x170
88 #define TRIG_OFFSET0 0x180 // 0x180 - 0x1f0
89 #define TRIG_OFFSET1 0x190 // 0x180 - 0x1f0
90 #define TRIG_OFFSET2 0x1a0 // 0x180 - 0x1f0
91 #define TRIG_OFFSET3 0x1b0 // 0x180 - 0x1f0
92 #define TRIG_OFFSET4 0x1c0 // 0x180 - 0x1f0
93 #define TRIG_OFFSET5 0x1d0 // 0x180 - 0x1f0
94 #define TRIG_OFFSET6 0x1e0 // 0x180 - 0x1f0
95 #define TRIG_OFFSET7 0x1f0 // 0x180 - 0x1f0
98 #define IRR_OFFSET0 0x200 // 0x200 - 0x270
99 #define IRR_OFFSET1 0x210 // 0x200 - 0x270
100 #define IRR_OFFSET2 0x220 // 0x200 - 0x270
101 #define IRR_OFFSET3 0x230 // 0x200 - 0x270
102 #define IRR_OFFSET4 0x240 // 0x200 - 0x270
103 #define IRR_OFFSET5 0x250 // 0x200 - 0x270
104 #define IRR_OFFSET6 0x260 // 0x200 - 0x270
105 #define IRR_OFFSET7 0x270 // 0x200 - 0x270
108 #define ESR_OFFSET 0x280
109 #define INT_CMD_LO_OFFSET 0x300
110 #define INT_CMD_HI_OFFSET 0x310
111 #define TMR_LOC_VEC_TBL_OFFSET 0x320
112 #define THERM_LOC_VEC_TBL_OFFSET 0x330
113 #define PERF_CTR_LOC_VEC_TBL_OFFSET 0x340
114 #define LINT0_VEC_TBL_OFFSET 0x350
115 #define LINT1_VEC_TBL_OFFSET 0x360
116 #define ERR_VEC_TBL_OFFSET 0x370
117 #define TMR_INIT_CNT_OFFSET 0x380
118 #define TMR_CUR_CNT_OFFSET 0x390
119 #define TMR_DIV_CFG_OFFSET 0x3e0
120 #define EXT_APIC_FEATURE_OFFSET 0x400
121 #define EXT_APIC_CMD_OFFSET 0x410
122 #define SEOI_OFFSET 0x420
124 #define IER_OFFSET0 0x480 // 0x480 - 0x4f0
125 #define IER_OFFSET1 0x490 // 0x480 - 0x4f0
126 #define IER_OFFSET2 0x4a0 // 0x480 - 0x4f0
127 #define IER_OFFSET3 0x4b0 // 0x480 - 0x4f0
128 #define IER_OFFSET4 0x4c0 // 0x480 - 0x4f0
129 #define IER_OFFSET5 0x4d0 // 0x480 - 0x4f0
130 #define IER_OFFSET6 0x4e0 // 0x480 - 0x4f0
131 #define IER_OFFSET7 0x4f0 // 0x480 - 0x4f0
133 #define EXT_INT_LOC_VEC_TBL_OFFSET0 0x500 // 0x500 - 0x530
134 #define EXT_INT_LOC_VEC_TBL_OFFSET1 0x510 // 0x500 - 0x530
135 #define EXT_INT_LOC_VEC_TBL_OFFSET2 0x520 // 0x500 - 0x530
136 #define EXT_INT_LOC_VEC_TBL_OFFSET3 0x530 // 0x500 - 0x530
143 uint8_t bootstrap_cpu : 1;
145 uint8_t apic_enable : 1;
146 uint64_t base_addr : 40;
148 } __attribute__((packed));
149 } __attribute__((packed));
150 } __attribute__((packed));
154 typedef enum {INIT_ST,
156 STARTED} ipi_state_t;
158 struct apic_dev_state;
164 struct apic_msr base_addr_msr;
167 /* memory map registers */
169 struct lapic_id_reg lapic_id;
170 struct apic_ver_reg apic_ver;
171 struct ext_apic_ctrl_reg ext_apic_ctrl;
172 struct local_vec_tbl_reg local_vec_tbl;
173 struct tmr_vec_tbl_reg tmr_vec_tbl;
174 struct tmr_div_cfg_reg tmr_div_cfg;
175 struct lint_vec_tbl_reg lint0_vec_tbl;
176 struct lint_vec_tbl_reg lint1_vec_tbl;
177 struct perf_ctr_loc_vec_tbl_reg perf_ctr_loc_vec_tbl;
178 struct therm_loc_vec_tbl_reg therm_loc_vec_tbl;
179 struct err_vec_tbl_reg err_vec_tbl;
180 struct err_status_reg err_status;
181 struct spurious_int_reg spurious_int;
182 struct int_cmd_reg int_cmd;
183 struct log_dst_reg log_dst;
184 struct dst_fmt_reg dst_fmt;
185 struct arb_prio_reg arb_prio;
186 struct task_prio_reg task_prio;
187 struct proc_prio_reg proc_prio;
188 struct ext_apic_feature_reg ext_apic_feature;
189 struct spec_eoi_reg spec_eoi;
192 uint32_t tmr_cur_cnt;
193 uint32_t tmr_init_cnt;
196 struct local_vec_tbl_reg ext_intr_vec_tbl[4];
198 uint32_t rem_rd_data;
201 ipi_state_t ipi_state;
203 uint8_t int_req_reg[32];
204 uint8_t int_svc_reg[32];
205 uint8_t int_en_reg[32];
206 uint8_t trig_mode_reg[32];
208 struct guest_info * core;
210 void * controller_handle;
212 struct v3_timer * timer;
222 struct apic_dev_state {
225 struct apic_state apics[0];
226 } __attribute__((packed));
230 static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, uint_t length, void * priv_data);
231 static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, uint_t length, void * priv_data);
233 static void init_apic_state(struct apic_state * apic, uint32_t id) {
234 apic->base_addr = DEFAULT_BASE_ADDR;
237 // boot processor, enabled
238 apic->base_addr_msr.value = 0x0000000000000900LL;
240 // ap processor, enabled
241 apic->base_addr_msr.value = 0x0000000000000800LL;
244 // same base address regardless of ap or main
245 apic->base_addr_msr.value |= ((uint64_t)DEFAULT_BASE_ADDR);
247 PrintDebug("apic %u: (init_apic_state): msr=0x%llx\n",id, apic->base_addr_msr.value);
249 PrintDebug("apic %u: (init_apic_state): Sizeof Interrupt Request Register %d, should be 32\n",
250 id, (uint_t)sizeof(apic->int_req_reg));
252 memset(apic->int_req_reg, 0, sizeof(apic->int_req_reg));
253 memset(apic->int_svc_reg, 0, sizeof(apic->int_svc_reg));
254 memset(apic->int_en_reg, 0xff, sizeof(apic->int_en_reg));
255 memset(apic->trig_mode_reg, 0, sizeof(apic->trig_mode_reg));
257 apic->eoi = 0x00000000;
258 apic->rem_rd_data = 0x00000000;
259 apic->tmr_init_cnt = 0x00000000;
260 apic->tmr_cur_cnt = 0x00000000;
262 apic->lapic_id.val = id;
264 apic->ipi_state = INIT_ST;
266 // The P6 has 6 LVT entries, so we set the value to (6-1)...
267 apic->apic_ver.val = 0x80050010;
269 apic->task_prio.val = 0x00000000;
270 apic->arb_prio.val = 0x00000000;
271 apic->proc_prio.val = 0x00000000;
272 apic->log_dst.val = 0x00000000;
273 apic->dst_fmt.val = 0xffffffff;
274 apic->spurious_int.val = 0x000000ff;
275 apic->err_status.val = 0x00000000;
276 apic->int_cmd.val = 0x0000000000000000LL;
277 apic->tmr_vec_tbl.val = 0x00010000;
278 apic->therm_loc_vec_tbl.val = 0x00010000;
279 apic->perf_ctr_loc_vec_tbl.val = 0x00010000;
280 apic->lint0_vec_tbl.val = 0x00010000;
281 apic->lint1_vec_tbl.val = 0x00010000;
282 apic->err_vec_tbl.val = 0x00010000;
283 apic->tmr_div_cfg.val = 0x00000000;
284 //apic->ext_apic_feature.val = 0x00000007;
285 apic->ext_apic_feature.val = 0x00040007;
286 apic->ext_apic_ctrl.val = 0x00000000;
287 apic->spec_eoi.val = 0x00000000;
289 v3_lock_init(&(apic->lock));
295 static int read_apic_msr(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) {
296 struct apic_dev_state * apic_dev = (struct apic_dev_state *)priv_data;
297 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
299 PrintDebug("apic %u: core %u: MSR read\n", apic->lapic_id.val, core->cpu_id);
301 dst->value = apic->base_addr;
302 v3_unlock(apic->lock);
307 static int write_apic_msr(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) {
308 struct apic_dev_state * apic_dev = (struct apic_dev_state *)priv_data;
309 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
310 struct v3_mem_region * old_reg = v3_get_mem_region(core->vm_info, core->cpu_id, apic->base_addr);
313 PrintDebug("apic %u: core %u: MSR write\n", apic->lapic_id.val, core->cpu_id);
315 if (old_reg == NULL) {
317 PrintError("apic %u: core %u: APIC Base address region does not exit...\n",
318 apic->lapic_id.val, core->cpu_id);
324 v3_delete_mem_region(core->vm_info, old_reg);
326 apic->base_addr = src.value;
328 if (v3_hook_full_mem(core->vm_info, core->cpu_id, apic->base_addr,
329 apic->base_addr + PAGE_SIZE_4KB,
330 apic_read, apic_write, apic_dev) == -1) {
331 PrintError("apic %u: core %u: Could not hook new APIC Base address\n",
332 apic->lapic_id.val, core->cpu_id);
333 v3_unlock(apic->lock);
337 v3_unlock(apic->lock);
342 // irq_num is the bit offset into a 256 bit buffer...
343 static int activate_apic_irq(struct apic_state * apic, uint32_t irq_num) {
344 int major_offset = (irq_num & ~0x00000007) >> 3;
345 int minor_offset = irq_num & 0x00000007;
346 uint8_t * req_location = apic->int_req_reg + major_offset;
347 uint8_t * en_location = apic->int_en_reg + major_offset;
348 uint8_t flag = 0x1 << minor_offset;
353 PrintError("apic %u: core %d: Attempting to raise an invalid interrupt: %d\n",
354 apic->lapic_id.val, apic->core->cpu_id, irq_num);
359 PrintDebug("apic %u: core %d: Raising APIC IRQ %d\n", apic->lapic_id.val, apic->core->cpu_id, irq_num);
361 if (*req_location & flag) {
362 PrintDebug("Interrupt %d coallescing\n", irq_num);
365 if (*en_location & flag) {
366 *req_location |= flag;
368 PrintDebug("apic %u: core %d: Interrupt not enabled... %.2x\n",
369 apic->lapic_id.val, apic->core->cpu_id,*en_location);
378 static int get_highest_isr(struct apic_state * apic) {
381 // We iterate backwards to find the highest priority
382 for (i = 31; i >= 0; i--) {
383 uint8_t * svc_major = apic->int_svc_reg + i;
385 if ((*svc_major) & 0xff) {
386 for (j = 7; j >= 0; j--) {
387 uint8_t flag = 0x1 << j;
388 if ((*svc_major) & flag) {
389 return ((i * 8) + j);
400 static int get_highest_irr(struct apic_state * apic) {
403 // We iterate backwards to find the highest priority
404 for (i = 31; i >= 0; i--) {
405 uint8_t * req_major = apic->int_req_reg + i;
407 if ((*req_major) & 0xff) {
408 for (j = 7; j >= 0; j--) {
409 uint8_t flag = 0x1 << j;
410 if ((*req_major) & flag) {
411 return ((i * 8) + j);
423 static int apic_do_eoi(struct apic_state * apic) {
424 int isr_irq = get_highest_isr(apic);
427 int major_offset = (isr_irq & ~0x00000007) >> 3;
428 int minor_offset = isr_irq & 0x00000007;
429 uint8_t flag = 0x1 << minor_offset;
430 uint8_t * svc_location = apic->int_svc_reg + major_offset;
432 PrintDebug("apic %u: core ?: Received APIC EOI for IRQ %d\n", apic->lapic_id.val,isr_irq);
434 *svc_location &= ~flag;
436 #ifdef CONFIG_CRAY_XT
438 if ((isr_irq == 238) ||
440 PrintDebug("apic %u: core ?: Acking IRQ %d\n", apic->lapic_id.val,isr_irq);
443 if (isr_irq == 238) {
448 //PrintError("apic %u: core ?: Spurious EOI...\n",apic->lapic_id.val);
455 static int activate_internal_irq(struct apic_state * apic, apic_irq_type_t int_type) {
456 uint32_t vec_num = 0;
457 uint32_t del_mode = 0;
463 vec_num = apic->tmr_vec_tbl.vec;
464 del_mode = APIC_FIXED_DELIVERY;
465 masked = apic->tmr_vec_tbl.mask;
468 vec_num = apic->therm_loc_vec_tbl.vec;
469 del_mode = apic->therm_loc_vec_tbl.msg_type;
470 masked = apic->therm_loc_vec_tbl.mask;
473 vec_num = apic->perf_ctr_loc_vec_tbl.vec;
474 del_mode = apic->perf_ctr_loc_vec_tbl.msg_type;
475 masked = apic->perf_ctr_loc_vec_tbl.mask;
478 vec_num = apic->lint0_vec_tbl.vec;
479 del_mode = apic->lint0_vec_tbl.msg_type;
480 masked = apic->lint0_vec_tbl.mask;
483 vec_num = apic->lint1_vec_tbl.vec;
484 del_mode = apic->lint1_vec_tbl.msg_type;
485 masked = apic->lint1_vec_tbl.mask;
488 vec_num = apic->err_vec_tbl.vec;
489 del_mode = APIC_FIXED_DELIVERY;
490 masked = apic->err_vec_tbl.mask;
493 PrintError("apic %u: core ?: Invalid APIC interrupt type\n", apic->lapic_id.val);
497 // interrupt is masked, don't send
499 PrintDebug("apic %u: core ?: Inerrupt is masked\n", apic->lapic_id.val);
503 if (del_mode == APIC_FIXED_DELIVERY) {
504 //PrintDebug("Activating internal APIC IRQ %d\n", vec_num);
505 return activate_apic_irq(apic, vec_num);
507 PrintError("apic %u: core ?: Unhandled Delivery Mode\n", apic->lapic_id.val);
514 static inline int should_deliver_cluster_ipi(struct guest_info * dst_core,
515 struct apic_state * dst_apic, uint8_t mda) {
517 if ( ((mda & 0xf0) == (dst_apic->log_dst.dst_log_id & 0xf0)) && // (I am in the cluster and
518 ((mda & 0x0f) & (dst_apic->log_dst.dst_log_id & 0x0f)) ) { // I am in the set)
520 PrintDebug("apic %u core %u: accepting clustered IRQ (mda 0x%x == log_dst 0x%x)\n",
521 dst_apic->lapic_id.val, dst_core->cpu_id, mda,
522 dst_apic->log_dst.dst_log_id);
526 PrintDebug("apic %u core %u: rejecting clustered IRQ (mda 0x%x != log_dst 0x%x)\n",
527 dst_apic->lapic_id.val, dst_core->cpu_id, mda,
528 dst_apic->log_dst.dst_log_id);
533 static inline int should_deliver_flat_ipi(struct guest_info * dst_core,
534 struct apic_state * dst_apic, uint8_t mda) {
536 if (dst_apic->log_dst.dst_log_id & mda) { // I am in the set
538 PrintDebug("apic %u core %u: accepting flat IRQ (mda 0x%x == log_dst 0x%x)\n",
539 dst_apic->lapic_id.val, dst_core->cpu_id, mda,
540 dst_apic->log_dst.dst_log_id);
543 PrintDebug("apic %u core %u: rejecting flat IRQ (mda 0x%x != log_dst 0x%x)\n",
544 dst_apic->lapic_id.val, dst_core->cpu_id, mda,
545 dst_apic->log_dst.dst_log_id);
552 static int should_deliver_ipi(struct guest_info * dst_core,
553 struct apic_state * dst_apic, uint8_t mda) {
556 if (dst_apic->dst_fmt.model == 0xf) {
559 // always deliver broadcast
563 return should_deliver_flat_ipi(dst_core, dst_apic, mda);
564 } else if (dst_apic->dst_fmt.model == 0x0) {
567 // always deliver broadcast
571 return should_deliver_cluster_ipi(dst_core, dst_apic, mda);
573 PrintError("apic %u core %u: invalid destination format register value 0x%x for logical mode delivery.\n",
574 dst_apic->lapic_id.val, dst_core->cpu_id, dst_apic->dst_fmt.model);
580 static int deliver_ipi(struct apic_state * src_apic,
581 struct apic_state * dst_apic,
582 uint32_t vector, uint8_t del_mode) {
584 struct guest_info * dst_core = dst_apic->core;
589 case 1: // lowest priority
590 PrintDebug("delivering IRQ %d to core %u\n", vector, dst_core->cpu_id);
592 activate_apic_irq(dst_apic, vector);
594 if (dst_apic != src_apic) {
595 // Assume core # is same as logical processor for now
596 // TODO FIX THIS FIX THIS
597 // THERE SHOULD BE: guestapicid->virtualapicid map,
598 // cpu_id->logical processor map
599 // host maitains logical proc->phsysical proc
600 PrintDebug(" non-local core, forcing it to exit\n");
602 #ifdef CONFIG_MULTITHREAD_OS
603 v3_interrupt_cpu(dst_core->vm_info, dst_core->cpu_id, 0);
612 PrintDebug(" INIT delivery to core %u\n", dst_core->cpu_id);
614 // TODO: any APIC reset on dest core (shouldn't be needed, but not sure...)
617 if (dst_apic->ipi_state != INIT_ST) {
618 PrintError(" Warning: core %u is not in INIT state (mode = %d), ignored\n",
619 dst_core->cpu_id, dst_apic->ipi_state);
620 // Only a warning, since INIT INIT SIPI is common
624 // We transition the target core to SIPI state
625 dst_apic->ipi_state = SIPI; // note: locking should not be needed here
627 // That should be it since the target core should be
628 // waiting in host on this transition
629 // either it's on another core or on a different preemptive thread
630 // in both cases, it will quickly notice this transition
631 // in particular, we should not need to force an exit here
633 PrintDebug(" INIT delivery done\n");
640 if (dst_apic->ipi_state != SIPI) {
641 PrintError(" core %u is not in SIPI state (mode = %d), ignored!\n",
642 dst_core->cpu_id, dst_apic->ipi_state);
646 // Write the RIP, CS, and descriptor
647 // assume the rest is already good to go
649 // vector VV -> rip at 0
651 // This means we start executing at linear address VV000
653 // So the selector needs to be VV00
654 // and the base needs to be VV000
657 dst_core->segments.cs.selector = vector << 8;
658 dst_core->segments.cs.limit = 0xffff;
659 dst_core->segments.cs.base = vector << 12;
661 PrintDebug(" SIPI delivery (0x%x -> 0x%x:0x0) to core %u\n",
662 vector, dst_core->segments.cs.selector, dst_core->cpu_id);
663 // Maybe need to adjust the APIC?
665 // We transition the target core to SIPI state
666 dst_core->core_run_state = CORE_RUNNING; // note: locking should not be needed here
667 dst_apic->ipi_state = STARTED;
669 // As with INIT, we should not need to do anything else
671 PrintDebug(" SIPI delivery done\n");
680 PrintError("IPI %d delivery is unsupported\n", del_mode);
689 static int route_ipi(struct apic_dev_state * apic_dev,
690 struct apic_state * src_apic,
691 struct int_cmd_reg * icr) {
692 struct apic_state * dest_apic = NULL;
694 PrintDebug("route_ipi: src_apic=%p, icr_data=%p\n",
695 src_apic, (void *)(addr_t)icr->val);
698 if ((icr->dst_mode == 0) && (icr->dst >= apic_dev->num_apics)) {
699 PrintError("route_ipi: Attempted send to unregistered apic id=%u\n",
704 dest_apic = &(apic_dev->apics[icr->dst]);
707 PrintDebug("route_ipi: IPI %s %u from apic %p to %s %s %u (icr=0x%llx)\n",
708 deliverymode_str[icr->del_mode],
711 (icr->dst_mode == 0) ? "(physical)" : "(logical)",
712 shorthand_str[icr->dst_shorthand],
716 switch (icr->dst_shorthand) {
718 case 0: // no shorthand
719 if (icr->dst_mode == 0) {
722 if (deliver_ipi(src_apic, dest_apic,
723 icr->vec, icr->del_mode) == -1) {
724 PrintError("Error: Could not deliver IPI\n");
731 uint8_t mda = icr->dst;
733 for (i = 0; i < apic_dev->num_apics; i++) {
734 dest_apic = &(apic_dev->apics[i]);
735 int del_flag = should_deliver_ipi(dest_apic->core, dest_apic, mda);
737 if (del_flag == -1) {
738 PrintError("Error checking delivery mode\n");
740 } else if (del_flag == 1) {
741 if (deliver_ipi(src_apic, dest_apic,
742 icr->vec, icr->del_mode) == -1) {
743 PrintError("Error: Could not deliver IPI\n");
754 if (src_apic == NULL) {
755 PrintError("Sending IPI to self from generic IPI sender\n");
759 if (icr->dst_mode == 0) {
760 if (deliver_ipi(src_apic, src_apic, icr->vec, icr->del_mode) == -1) {
761 PrintError("Could not deliver IPI\n");
766 PrintError("use of logical delivery in self is not yet supported.\n");
772 case 3: { // all and all-but-me
773 // assuming that logical verus physical doesn't matter
774 // although it is odd that both are used
777 for (i = 0; i < apic_dev->num_apics; i++) {
778 dest_apic = &(apic_dev->apics[i]);
780 if ((dest_apic != src_apic) || (icr->dst_shorthand == 2)) {
781 if (deliver_ipi(src_apic, dest_apic, icr->vec, icr->del_mode) == -1) {
782 PrintError("Error: Could not deliver IPI\n");
791 PrintError("Error routing IPI, invalid Mode (%d)\n", icr->dst_shorthand);
801 static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, uint_t length, void * priv_data) {
802 struct apic_dev_state * apic_dev = (struct apic_dev_state *)(priv_data);
803 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
804 addr_t reg_addr = guest_addr - apic->base_addr;
805 struct apic_msr * msr = (struct apic_msr *)&(apic->base_addr_msr.value);
809 PrintDebug("apic %u: core %u: at %p: Read apic address space (%p)\n",
810 apic->lapic_id.val, core->cpu_id, apic, (void *)guest_addr);
812 if (msr->apic_enable == 0) {
813 PrintError("apic %u: core %u: Read from APIC address space with disabled APIC, apic msr=0x%llx\n",
814 apic->lapic_id.val, core->cpu_id, apic->base_addr_msr.value);
820 /* Because "May not be supported" doesn't matter to Linux developers... */
821 /* if (length != 4) { */
822 /* PrintError("Invalid apic read length (%d)\n", length); */
826 switch (reg_addr & ~0x3) {
828 // Well, only an idiot would read from a architectural write only register
830 // PrintError("Attempting to read from write only register\n");
836 val = apic->lapic_id.val;
838 case APIC_VERSION_OFFSET:
839 val = apic->apic_ver.val;
842 val = apic->task_prio.val;
845 val = apic->arb_prio.val;
848 val = apic->proc_prio.val;
850 case REMOTE_READ_OFFSET:
851 val = apic->rem_rd_data;
854 val = apic->log_dst.val;
857 val = apic->dst_fmt.val;
859 case SPURIOUS_INT_VEC_OFFSET:
860 val = apic->spurious_int.val;
863 val = apic->err_status.val;
865 case TMR_LOC_VEC_TBL_OFFSET:
866 val = apic->tmr_vec_tbl.val;
868 case LINT0_VEC_TBL_OFFSET:
869 val = apic->lint0_vec_tbl.val;
871 case LINT1_VEC_TBL_OFFSET:
872 val = apic->lint1_vec_tbl.val;
874 case ERR_VEC_TBL_OFFSET:
875 val = apic->err_vec_tbl.val;
877 case TMR_INIT_CNT_OFFSET:
878 val = apic->tmr_init_cnt;
880 case TMR_DIV_CFG_OFFSET:
881 val = apic->tmr_div_cfg.val;
885 val = *(uint32_t *)(apic->int_en_reg);
888 val = *(uint32_t *)(apic->int_en_reg + 4);
891 val = *(uint32_t *)(apic->int_en_reg + 8);
894 val = *(uint32_t *)(apic->int_en_reg + 12);
897 val = *(uint32_t *)(apic->int_en_reg + 16);
900 val = *(uint32_t *)(apic->int_en_reg + 20);
903 val = *(uint32_t *)(apic->int_en_reg + 24);
906 val = *(uint32_t *)(apic->int_en_reg + 28);
910 val = *(uint32_t *)(apic->int_svc_reg);
913 val = *(uint32_t *)(apic->int_svc_reg + 4);
916 val = *(uint32_t *)(apic->int_svc_reg + 8);
919 val = *(uint32_t *)(apic->int_svc_reg + 12);
922 val = *(uint32_t *)(apic->int_svc_reg + 16);
925 val = *(uint32_t *)(apic->int_svc_reg + 20);
928 val = *(uint32_t *)(apic->int_svc_reg + 24);
931 val = *(uint32_t *)(apic->int_svc_reg + 28);
935 val = *(uint32_t *)(apic->trig_mode_reg);
938 val = *(uint32_t *)(apic->trig_mode_reg + 4);
941 val = *(uint32_t *)(apic->trig_mode_reg + 8);
944 val = *(uint32_t *)(apic->trig_mode_reg + 12);
947 val = *(uint32_t *)(apic->trig_mode_reg + 16);
950 val = *(uint32_t *)(apic->trig_mode_reg + 20);
953 val = *(uint32_t *)(apic->trig_mode_reg + 24);
956 val = *(uint32_t *)(apic->trig_mode_reg + 28);
960 val = *(uint32_t *)(apic->int_req_reg);
963 val = *(uint32_t *)(apic->int_req_reg + 4);
966 val = *(uint32_t *)(apic->int_req_reg + 8);
969 val = *(uint32_t *)(apic->int_req_reg + 12);
972 val = *(uint32_t *)(apic->int_req_reg + 16);
975 val = *(uint32_t *)(apic->int_req_reg + 20);
978 val = *(uint32_t *)(apic->int_req_reg + 24);
981 val = *(uint32_t *)(apic->int_req_reg + 28);
983 case TMR_CUR_CNT_OFFSET:
984 val = apic->tmr_cur_cnt;
987 // We are not going to implement these....
988 case THERM_LOC_VEC_TBL_OFFSET:
989 val = apic->therm_loc_vec_tbl.val;
991 case PERF_CTR_LOC_VEC_TBL_OFFSET:
992 val = apic->perf_ctr_loc_vec_tbl.val;
998 case INT_CMD_LO_OFFSET:
999 val = apic->int_cmd.lo;
1001 case INT_CMD_HI_OFFSET:
1002 val = apic->int_cmd.hi;
1005 // handle current timer count
1007 // Unhandled Registers
1008 case EXT_INT_LOC_VEC_TBL_OFFSET0:
1009 val = apic->ext_intr_vec_tbl[0].val;
1011 case EXT_INT_LOC_VEC_TBL_OFFSET1:
1012 val = apic->ext_intr_vec_tbl[1].val;
1014 case EXT_INT_LOC_VEC_TBL_OFFSET2:
1015 val = apic->ext_intr_vec_tbl[2].val;
1017 case EXT_INT_LOC_VEC_TBL_OFFSET3:
1018 val = apic->ext_intr_vec_tbl[3].val;
1022 case EXT_APIC_FEATURE_OFFSET:
1023 case EXT_APIC_CMD_OFFSET:
1027 PrintError("apic %u: core %u: Read from Unhandled APIC Register: %x (getting zero)\n",
1028 apic->lapic_id.val, core->cpu_id, (uint32_t)reg_addr);
1034 uint_t byte_addr = reg_addr & 0x3;
1035 uint8_t * val_ptr = (uint8_t *)dst;
1037 *val_ptr = *(((uint8_t *)&val) + byte_addr);
1039 } else if ((length == 2) &&
1040 ((reg_addr & 0x3) == 0x3)) {
1041 uint_t byte_addr = reg_addr & 0x3;
1042 uint16_t * val_ptr = (uint16_t *)dst;
1043 *val_ptr = *(((uint16_t *)&val) + byte_addr);
1045 } else if (length == 4) {
1046 uint32_t * val_ptr = (uint32_t *)dst;
1050 PrintError("apic %u: core %u: Invalid apic read length (%d)\n",
1051 apic->lapic_id.val, core->cpu_id, length);
1055 PrintDebug("apic %u: core %u: Read finished (val=%x)\n",
1056 apic->lapic_id.val, core->cpu_id, *(uint32_t *)dst);
1065 static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, uint_t length, void * priv_data) {
1066 struct apic_dev_state * apic_dev = (struct apic_dev_state *)(priv_data);
1067 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
1068 addr_t reg_addr = guest_addr - apic->base_addr;
1069 struct apic_msr * msr = (struct apic_msr *)&(apic->base_addr_msr.value);
1070 uint32_t op_val = *(uint32_t *)src;
1072 PrintDebug("apic %u: core %u: at %p and priv_data is at %p\n",
1073 apic->lapic_id.val, core->cpu_id, apic, priv_data);
1075 PrintDebug("apic %u: core %u: write to address space (%p) (val=%x)\n",
1076 apic->lapic_id.val, core->cpu_id, (void *)guest_addr, *(uint32_t *)src);
1078 if (msr->apic_enable == 0) {
1079 PrintError("apic %u: core %u: Write to APIC address space with disabled APIC, apic msr=0x%llx\n",
1080 apic->lapic_id.val, core->cpu_id, apic->base_addr_msr.value);
1086 PrintError("apic %u: core %u: Invalid apic write length (%d)\n",
1087 apic->lapic_id.val, length, core->cpu_id);
1092 case REMOTE_READ_OFFSET:
1093 case APIC_VERSION_OFFSET:
1120 case EXT_APIC_FEATURE_OFFSET:
1122 PrintError("apic %u: core %u: Attempting to write to read only register %p (error)\n",
1123 apic->lapic_id.val, core->cpu_id, (void *)reg_addr);
1129 case APIC_ID_OFFSET:
1130 PrintDebug("apic %u: core %u: my id is being changed to %u\n",
1131 apic->lapic_id.val, core->cpu_id, op_val);
1133 apic->lapic_id.val = op_val;
1136 apic->task_prio.val = op_val;
1139 PrintDebug("apic %u: core %u: setting log_dst.val to 0x%x\n",
1140 apic->lapic_id.val, core->cpu_id, op_val);
1141 apic->log_dst.val = op_val;
1144 apic->dst_fmt.val = op_val;
1146 case SPURIOUS_INT_VEC_OFFSET:
1147 apic->spurious_int.val = op_val;
1150 apic->err_status.val = op_val;
1152 case TMR_LOC_VEC_TBL_OFFSET:
1153 apic->tmr_vec_tbl.val = op_val;
1155 case THERM_LOC_VEC_TBL_OFFSET:
1156 apic->therm_loc_vec_tbl.val = op_val;
1158 case PERF_CTR_LOC_VEC_TBL_OFFSET:
1159 apic->perf_ctr_loc_vec_tbl.val = op_val;
1161 case LINT0_VEC_TBL_OFFSET:
1162 apic->lint0_vec_tbl.val = op_val;
1164 case LINT1_VEC_TBL_OFFSET:
1165 apic->lint1_vec_tbl.val = op_val;
1167 case ERR_VEC_TBL_OFFSET:
1168 apic->err_vec_tbl.val = op_val;
1170 case TMR_INIT_CNT_OFFSET:
1171 apic->tmr_init_cnt = op_val;
1172 apic->tmr_cur_cnt = op_val;
1174 case TMR_CUR_CNT_OFFSET:
1175 apic->tmr_cur_cnt = op_val;
1177 case TMR_DIV_CFG_OFFSET:
1178 apic->tmr_div_cfg.val = op_val;
1182 // Enable mask (256 bits)
1184 *(uint32_t *)(apic->int_en_reg) = op_val;
1187 *(uint32_t *)(apic->int_en_reg + 4) = op_val;
1190 *(uint32_t *)(apic->int_en_reg + 8) = op_val;
1193 *(uint32_t *)(apic->int_en_reg + 12) = op_val;
1196 *(uint32_t *)(apic->int_en_reg + 16) = op_val;
1199 *(uint32_t *)(apic->int_en_reg + 20) = op_val;
1202 *(uint32_t *)(apic->int_en_reg + 24) = op_val;
1205 *(uint32_t *)(apic->int_en_reg + 28) = op_val;
1208 case EXT_INT_LOC_VEC_TBL_OFFSET0:
1209 apic->ext_intr_vec_tbl[0].val = op_val;
1211 case EXT_INT_LOC_VEC_TBL_OFFSET1:
1212 apic->ext_intr_vec_tbl[1].val = op_val;
1214 case EXT_INT_LOC_VEC_TBL_OFFSET2:
1215 apic->ext_intr_vec_tbl[2].val = op_val;
1217 case EXT_INT_LOC_VEC_TBL_OFFSET3:
1218 apic->ext_intr_vec_tbl[3].val = op_val;
1228 case INT_CMD_LO_OFFSET:
1229 apic->int_cmd.lo = op_val;
1231 PrintDebug("apic %u: core %u: sending cmd 0x%llx to apic %u\n",
1232 apic->lapic_id.val, core->cpu_id,
1233 apic->int_cmd.val, apic->int_cmd.dst);
1235 if (route_ipi(apic_dev, apic, &(apic->int_cmd)) == -1) {
1236 PrintError("IPI Routing failure\n");
1242 case INT_CMD_HI_OFFSET:
1243 apic->int_cmd.hi = op_val;
1247 // Unhandled Registers
1248 case EXT_APIC_CMD_OFFSET:
1251 PrintError("apic %u: core %u: Write to Unhandled APIC Register: %x (ignored)\n",
1252 apic->lapic_id.val, core->cpu_id, (uint32_t)reg_addr);
1257 PrintDebug("apic %u: core %u: Write finished\n", apic->lapic_id.val, core->cpu_id);
1264 /* Interrupt Controller Functions */
1266 // returns 1 if an interrupt is pending, 0 otherwise
1267 static int apic_intr_pending(struct guest_info * core, void * private_data) {
1268 struct apic_dev_state * apic_dev = (struct apic_dev_state *)(private_data);
1269 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
1270 int req_irq = get_highest_irr(apic);
1271 int svc_irq = get_highest_isr(apic);
1273 // PrintDebug("apic %u: core %u: req_irq=%d, svc_irq=%d\n",apic->lapic_id.val,info->cpu_id,req_irq,svc_irq);
1275 if ((req_irq >= 0) &&
1276 (req_irq > svc_irq)) {
1283 static int apic_get_intr_number(struct guest_info * core, void * private_data) {
1284 struct apic_dev_state * apic_dev = (struct apic_dev_state *)(private_data);
1285 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
1286 int req_irq = get_highest_irr(apic);
1287 int svc_irq = get_highest_isr(apic);
1289 if (svc_irq == -1) {
1291 } else if (svc_irq < req_irq) {
1299 int v3_apic_send_ipi(struct v3_vm_info * vm, struct v3_gen_ipi * ipi, void * dev_data) {
1300 struct apic_dev_state * apic_dev = (struct apic_dev_state *)dev_data;
1301 struct int_cmd_reg tmp_icr;
1303 // zero out all the fields
1307 tmp_icr.vec = ipi->vector;
1308 tmp_icr.del_mode = ipi->mode;
1309 tmp_icr.dst_mode = ipi->logical;
1310 tmp_icr.trig_mode = ipi->trigger_mode;
1311 tmp_icr.dst_shorthand = ipi->dst_shorthand;
1312 tmp_icr.dst = ipi->dst;
1315 return route_ipi(apic_dev, NULL, &tmp_icr);
1319 int v3_apic_raise_intr(struct v3_vm_info * vm, uint32_t irq, uint32_t dst, void * dev_data) {
1320 struct apic_dev_state * apic_dev = (struct apic_dev_state *)(dev_data);
1321 struct apic_state * apic = &(apic_dev->apics[dst]);
1323 PrintDebug("apic %u core ?: raising interrupt IRQ %u (dst = %u).\n", apic->lapic_id.val, irq, dst);
1325 activate_apic_irq(apic, irq);
1327 if (V3_Get_CPU() != dst) {
1328 #ifdef CONFIG_MULTITHREAD_OS
1329 v3_interrupt_cpu(vm, dst, 0);
1340 static int apic_begin_irq(struct guest_info * core, void * private_data, int irq) {
1341 struct apic_dev_state * apic_dev = (struct apic_dev_state *)(private_data);
1342 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
1343 int major_offset = (irq & ~0x00000007) >> 3;
1344 int minor_offset = irq & 0x00000007;
1345 uint8_t * req_location = apic->int_req_reg + major_offset;
1346 uint8_t * svc_location = apic->int_svc_reg + major_offset;
1347 uint8_t flag = 0x01 << minor_offset;
1349 if (*req_location & flag) {
1350 // we will only pay attention to a begin irq if we
1351 // know that we initiated it!
1352 *svc_location |= flag;
1353 *req_location &= ~flag;
1356 //PrintDebug("apic %u: core %u: begin irq for %d ignored since I don't own it\n",
1357 // apic->lapic_id.val, core->cpu_id, irq);
1366 /* Timer Functions */
1367 static void apic_update_time(struct guest_info * core,
1368 uint64_t cpu_cycles, uint64_t cpu_freq,
1370 struct apic_dev_state * apic_dev = (struct apic_dev_state *)(priv_data);
1371 struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
1373 // The 32 bit GCC runtime is a pile of shit
1375 uint64_t tmr_ticks = 0;
1377 uint32_t tmr_ticks = 0;
1380 uint8_t tmr_div = *(uint8_t *)&(apic->tmr_div_cfg.val);
1381 uint_t shift_num = 0;
1384 // Check whether this is true:
1385 // -> If the Init count is zero then the timer is disabled
1386 // and doesn't just blitz interrupts to the CPU
1387 if ((apic->tmr_init_cnt == 0) ||
1388 ( (apic->tmr_vec_tbl.tmr_mode == APIC_TMR_ONESHOT) &&
1389 (apic->tmr_cur_cnt == 0))) {
1390 //PrintDebug("apic %u: core %u: APIC timer not yet initialized\n",apic->lapic_id.val,info->cpu_id);
1408 case APIC_TMR_DIV16:
1411 case APIC_TMR_DIV32:
1414 case APIC_TMR_DIV64:
1417 case APIC_TMR_DIV128:
1421 PrintError("apic %u: core %u: Invalid Timer Divider configuration\n",
1422 apic->lapic_id.val, core->cpu_id);
1426 tmr_ticks = cpu_cycles >> shift_num;
1427 // PrintDebug("Timer Ticks: %p\n", (void *)tmr_ticks);
1429 if (tmr_ticks < apic->tmr_cur_cnt) {
1430 apic->tmr_cur_cnt -= tmr_ticks;
1432 tmr_ticks -= apic->tmr_cur_cnt;
1433 apic->tmr_cur_cnt = 0;
1436 PrintDebug("apic %u: core %u: Raising APIC Timer interrupt (periodic=%d) (icnt=%d) (div=%d)\n",
1437 apic->lapic_id.val, core->cpu_id,
1438 apic->tmr_vec_tbl.tmr_mode, apic->tmr_init_cnt, shift_num);
1440 if (apic_intr_pending(core, priv_data)) {
1441 PrintDebug("apic %u: core %u: Overriding pending IRQ %d\n",
1442 apic->lapic_id.val, core->cpu_id,
1443 apic_get_intr_number(core, priv_data));
1446 if (activate_internal_irq(apic, APIC_TMR_INT) == -1) {
1447 PrintError("apic %u: core %u: Could not raise Timer interrupt\n",
1448 apic->lapic_id.val, core->cpu_id);
1451 if (apic->tmr_vec_tbl.tmr_mode == APIC_TMR_PERIODIC) {
1452 tmr_ticks = tmr_ticks % apic->tmr_init_cnt;
1453 apic->tmr_cur_cnt = apic->tmr_init_cnt - tmr_ticks;
1461 static struct intr_ctrl_ops intr_ops = {
1462 .intr_pending = apic_intr_pending,
1463 .get_intr_number = apic_get_intr_number,
1464 .begin_irq = apic_begin_irq,
1468 static struct v3_timer_ops timer_ops = {
1469 .update_timer = apic_update_time,
1475 static int apic_free(struct apic_dev_state * apic_dev) {
1477 struct v3_vm_info * vm = NULL;
1479 for (i = 0; i < apic_dev->num_apics; i++) {
1480 struct apic_state * apic = &(apic_dev->apics[i]);
1481 struct guest_info * core = apic->core;
1485 v3_remove_intr_controller(core, apic->controller_handle);
1488 v3_remove_timer(core, apic->timer);
1495 v3_unhook_msr(vm, BASE_ADDR_MSR);
1502 static struct v3_device_ops dev_ops = {
1503 .free = (int (*)(void *))apic_free,
1510 static int apic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
1511 char * dev_id = v3_cfg_val(cfg, "ID");
1512 struct apic_dev_state * apic_dev = NULL;
1515 PrintDebug("apic: creating an APIC for each core\n");
1517 apic_dev = (struct apic_dev_state *)V3_Malloc(sizeof(struct apic_dev_state) +
1518 sizeof(struct apic_state) * vm->num_cores);
1520 apic_dev->num_apics = vm->num_cores;
1522 struct vm_device * dev = v3_add_device(vm, dev_id, &dev_ops, apic_dev);
1525 PrintError("apic: Could not attach device %s\n", dev_id);
1531 for (i = 0; i < vm->num_cores; i++) {
1532 struct apic_state * apic = &(apic_dev->apics[i]);
1533 struct guest_info * core = &(vm->cores[i]);
1537 init_apic_state(apic, i);
1539 apic->controller_handle = v3_register_intr_controller(core, &intr_ops, apic_dev);
1541 apic->timer = v3_add_timer(core, &timer_ops, apic_dev);
1543 if (apic->timer == NULL) {
1544 PrintError("APIC: Failed to attach timer to core %d\n", i);
1545 v3_remove_device(dev);
1549 v3_hook_full_mem(vm, core->cpu_id, apic->base_addr, apic->base_addr + PAGE_SIZE_4KB, apic_read, apic_write, apic_dev);
1551 PrintDebug("apic %u: (setup device): done, my id is %u\n", i, apic->lapic_id.val);
1554 #ifdef CONFIG_DEBUG_APIC
1555 for (i = 0; i < vm->num_cores; i++) {
1556 struct apic_state * apic = &(apic_dev->apics[i]);
1557 PrintDebug("apic: sanity check: apic %u (at %p) has id %u and msr value %llx\n",
1558 i, apic, apic->lapic_id.val, apic->base_addr_msr.value);
1563 PrintDebug("apic: priv_data is at %p\n", apic_dev);
1565 v3_hook_msr(vm, BASE_ADDR_MSR, read_apic_msr, write_apic_msr, apic_dev);
1572 device_register("LAPIC", apic_init)