2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_dev_mgr.h>
21 #include <palacios/vmm_sprintf.h>
22 #include <palacios/vm_guest.h>
23 #include <devices/icc_bus.h>
24 #include <devices/apic_regs.h>
28 #ifndef CONFIG_DEBUG_ICC_BUS
30 #define PrintDebug(fmt, args...)
34 void v3_force_exit(void *p) {
35 #ifdef CONFIG_DEBUG_ICC_BUS
36 struct guest_info *core=(struct guest_info *)p;
38 PrintDebug("core %u: Forced to exit!\n",core->cpu_id);
41 struct ipi_thunk_data {
42 struct vm_device * target;
49 struct guest_info * core;
50 struct v3_icc_ops * ops;
57 struct icc_bus_state {
58 struct apic_data apics[MAX_APICS];
63 static struct v3_device_ops dev_ops = {
70 #ifdef CONFIG_DEBUG_ICC_BUS
71 static char *shorthand_str[] = {
78 static char *deliverymode_str[] = {
91 static int deliver(uint32_t src_apic, struct apic_data *dest_apic, struct int_cmd_reg *icr, struct icc_bus_state * state, uint32_t extirq) {
93 switch (icr->del_mode) {
96 case 1: // lowest priority
98 PrintDebug("icc_bus: delivering IRQ to core %u\n",dest_apic->core->cpu_id);
99 dest_apic->ops->raise_intr(dest_apic->core,
100 icr->del_mode!=7 ? icr->vec : extirq,
101 dest_apic->priv_data);
102 if (src_apic!=state->ioapic_id && dest_apic->core->cpu_id != src_apic) {
103 // Assume core # is same as logical processor for now
104 // TODO FIX THIS FIX THIS
105 // THERE SHOULD BE: guestapicid->virtualapicid map,
106 // cpu_id->logical processor map
107 // host maitains logical proc->phsysical proc
108 PrintDebug("icc_bus: non-local core, forcing it to exit\n");
109 V3_Call_On_CPU(dest_apic->core->cpu_id,v3_force_exit,(void*)(dest_apic->core));
110 // TODO: do what the print says
115 PrintError("icc_bus: SMI delivery is unsupported\n");
120 PrintError("icc_bus: Reserved delivery mode 3 is unsupported\n");
125 PrintError("icc_bus: NMI delivery is unsupported\n");
130 struct guest_info *core = dest_apic->core;
132 PrintDebug("icc_bus: INIT delivery to core %u\n",core->cpu_id);
134 // TODO: any APIC reset on dest core (shouldn't be needed, but not sure...)
137 if (core->cpu_mode != INIT) {
138 PrintError("icc_bus: Warning: core %u is not in INIT state, ignored\n",core->cpu_id);
139 // Only a warning, since INIT INIT SIPI is common
143 // We transition the target core to SIPI state
144 core->cpu_mode = SIPI; // note: locking should not be needed here
146 // That should be it since the target core should be
147 // waiting in host on this transition
148 // either it's on another core or on a different preemptive thread
149 // in both cases, it will quickly notice this transition
150 // in particular, we should not need to force an exit here
152 PrintDebug("icc_bus: INIT delivery done\n");
158 struct guest_info *core = dest_apic->core;
161 if (core->cpu_mode!=SIPI) {
162 PrintError("icc_bus: core %u is not in SIPI state, ignored!\n",core->cpu_id);
166 // Write the RIP, CS, and descriptor
167 // assume the rest is already good to go
169 // vector VV -> rip at 0
171 // This means we start executing at linear address VV000
173 // So the selector needs to be VV00
174 // and the base needs to be VV000
177 core->segments.cs.selector = icr->vec << 8;
178 core->segments.cs.limit = 0xffff;
179 core->segments.cs.base = icr->vec << 12;
181 PrintDebug("icc_bus: SIPI delivery (0x%x -> 0x%x:0x0) to core %u\n",
182 icr->vec, core->segments.cs.selector, core->cpu_id);
183 // Maybe need to adjust the APIC?
185 // We transition the target core to SIPI state
186 core->cpu_mode = REAL; // note: locking should not be needed here
188 // As with INIT, we should not need to do anything else
190 PrintDebug("icc_bus: SIPI delivery done\n");
201 // icr_data contains interrupt vector *except* for ext_int
202 // in which case it is given via irq
205 int v3_icc_send_ipi(struct vm_device * icc_bus, uint32_t src_apic, uint64_t icr_data,
206 uint32_t dfr_data, uint32_t extirq) {
208 PrintDebug("icc_bus: icc_bus=%p, src_apic=%u, icr_data=%llx, extirq=%u\n",icc_bus,src_apic,icr_data,extirq);
210 struct int_cmd_reg *icr = (struct int_cmd_reg *)&icr_data;
211 struct dst_fmt_reg *dfr = (struct dst_fmt_reg*)&dfr_data;
213 struct icc_bus_state * state = (struct icc_bus_state *)icc_bus->private_data;
214 struct apic_data * dest_apic = NULL;
215 PrintDebug("icc_bus: icc_bus=%p, src_apic=%u, icr_data=%llx, extirq=%u\n",
216 icc_bus, src_apic, icr_data, extirq);
218 // initial sanity checks
219 if ((src_apic >= MAX_APICS) ||
220 ((state->apics[src_apic].present == 0) &&
221 (src_apic != state->ioapic_id))) {
222 PrintError("icc_bus: Apparently sending from unregistered apic id=%u\n",src_apic);
227 if ((icr->dst_mode == 0) && (state->apics[icr->dst].present == 0)) {
228 PrintError("icc_bus: Attempted send to unregistered apic id=%u\n", icr->dst);
232 dest_apic = &(state->apics[icr->dst]);
235 PrintDebug("icc_bus: IPI %s %u from %s %u to %s %s %u (icr=0x%llx, dfr=0x%x) (extirq=%u)\n",
236 deliverymode_str[icr->del_mode], icr->vec,
237 src_apic==state->ioapic_id ? "ioapic" : "apic",
239 icr->dst_mode==0 ? "(physical)" : "(logical)",
240 shorthand_str[icr->dst_shorthand], icr->dst,icr->val, dfr->val,
245 if (icr->dst==state->ioapic_id) {
246 PrintError("icc_bus: Attempted send to ioapic ignored\n");
253 switch (icr->dst_shorthand) {
255 case 0: // no shorthand
257 if (icr->dst_mode==0) {
259 struct apic_data * dest_apic = &(state->apics[icr->dst]);
260 if (deliver(src_apic,dest_apic,icr,state,extirq)) {
265 uint8_t mda = icr->dst; // message destination address, not physical address
267 if (dfr->model==0xf) {
269 // this means we deliver the IPI each destination APIC where
270 // mda of sender & ldr of receiver is nonzero
271 // mda=0xff means broadcast to all
274 for (i=0;i<MAX_APICS;i++) {
275 struct apic_data *dest_apic=&(state->apics[i]);
276 if (dest_apic->present &&
277 dest_apic->ops->should_deliver_flat(dest_apic->core,
279 dest_apic->priv_data)) {
280 if (deliver(src_apic,dest_apic,icr,state,extirq)) {
285 } else if (dfr->model==0x0) {
288 // there are two variants of this
290 // 1. (ancient P5/P6) All apics are on one bus
291 // mda[31:28] is the target cluster,
292 // mda[27:24] has one bit for each apic in the cluster
293 // mda[31:28] of sending apic == ldr[31:28] of dest apic means
294 // the dest apic is part of the cluster
295 // then mda[27:24] & ldr[27:24] nonzero means to deliver
296 // also, mda=0xff still means broadcast
297 // So, basically, you have 15 clusters of 4 apics each + broadcast
299 // 2. (current) hierarchical cluster model
300 // This is some hwat unclearly documented in volume 3, 9-32
301 // basically, you have a hierarchy of clusters that where
302 // each cluster has 4 agents (APICs?) and a cluster manager.
303 // The cluster manager is not an apic, though, and outside of
304 // scope of documents. Again, you have 15 clusters of 4 apics
305 // each + broadcast. My impression is that this is identical
306 // to variant 1 for our purposes.
309 // if we are in lowest priorty mode, we should just pick one
310 // according to the arbitrarion prioty register
312 for (i=0;i<MAX_APICS;i++) {
313 struct apic_data *dest_apic=&(state->apics[i]);
314 if (dest_apic->present &&
315 dest_apic->ops->should_deliver_cluster(dest_apic->core,
317 dest_apic->priv_data)) {
318 if (deliver(src_apic,dest_apic,icr,state,extirq)) {
324 PrintError("icc_bus: unknown logical delivery model 0x%x\n", dfr->model);
334 if (icr->dst_mode==0) {
336 if (icr->dst==state->ioapic_id) {
337 PrintError("icc_bus: ioapic attempting to send to itself\n");
340 struct apic_data *dest_apic=&(state->apics[src_apic]);
341 if (deliver(src_apic,dest_apic,icr,state,extirq)) {
346 PrintError("icc_bus: use of logical delivery in self is not yet supported.\n");
354 case 3: { // all and all-but-me
355 // assuming that logical verus physical doesn't matter
356 // although it is odd that both are used
358 for (i=0;i<MAX_APICS;i++) {
359 struct apic_data *dest_apic=&(state->apics[i]);
360 if (dest_apic->present && (i!=src_apic || icr->dst_shorthand==2)) {
361 if (deliver(src_apic,dest_apic,icr,state,extirq)) {
379 /* THIS IS A BIG ASSUMPTION: APIC PHYSID == LOGID == CORENUM */
381 int v3_icc_register_apic(struct guest_info * core, struct vm_device * icc_bus,
382 uint8_t apic_num, struct v3_icc_ops * ops, void * priv_data) {
383 struct icc_bus_state * icc = (struct icc_bus_state *)icc_bus->private_data;
384 struct apic_data * apic = &(icc->apics[apic_num]);
386 if (apic->present == 1) {
387 PrintError("icc_bus: Attempt to re-register apic %u\n", apic_num);
392 apic->priv_data = priv_data;
396 PrintDebug("icc_bus: Registered apic %u\n", apic_num);
402 int v3_icc_register_ioapic(struct v3_vm_info *vm, struct vm_device * icc_bus, uint8_t apic_num)
404 struct icc_bus_state * icc = (struct icc_bus_state *)icc_bus->private_data;
406 if (icc->ioapic_id) {
407 PrintError("icc_bus: Attempt to register a second ioapic!\n");
411 icc->ioapic_id=apic_num;
413 PrintDebug("icc_bus: Registered ioapic %u\n", apic_num);
420 static int icc_bus_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
421 PrintDebug("icc_bus: Creating ICC_BUS\n");
423 char * dev_id = v3_cfg_val(cfg, "ID");
425 struct icc_bus_state * icc_bus = (struct icc_bus_state *)V3_Malloc(sizeof(struct icc_bus_state));
426 memset(icc_bus, 0, sizeof(struct icc_bus_state));
428 struct vm_device * dev = v3_allocate_device(dev_id, &dev_ops, icc_bus);
430 if (v3_attach_device(vm, dev) == -1) {
431 PrintError("icc_bus: Could not attach device %s\n", dev_id);
440 device_register("ICC_BUS", icc_bus_init)