help
This enables the necesary options to compile Palacios with Kitten
+config LINUX
+ bool "Linux 2.6"
+ select BUILT_IN_STDLIB
+ select BUILT_IN_ATOI
+ help
+ This enables the necessary options to compile Palacios with Linux 2.6
+ Currently, this is in development, and only 2.6.32 is verified to work
+
config MINIX
bool "MINIX 3"
help
bool "Other OS"
help
Choose this to compile Palacios for a Generic Host OS
+ (Formerly GeekOS)
endchoice
struct {
uint_t vec : 8;
- uint_t msg_type : 3;
+ uint_t del_mode : 3;
uint_t dst_mode : 1;
uint_t del_status : 1;
uint_t rsvd1 : 1;
/**
*
*/
-int v3_icc_register_apic(struct guest_info * vm, struct vm_device * icc_bus, uint8_t apic_phys_id, struct v3_icc_ops * ops, void * priv_data);
-
+int v3_icc_register_apic(struct guest_info *core, struct vm_device * icc_bus, uint8_t apic_phys_id, struct v3_icc_ops * ops, void * priv_data);
+int v3_icc_register_ioapic(struct v3_vm_info *vm, struct vm_device * icc_bus, uint8_t apic_phys_id);
/**
- * Send an inter-processor interrupt (IPI) from this local APIC to another local APIC.
+ * Send an inter-processor interrupt (IPI) from one local APIC to another local APIC.
*
- * @param icc_bus - The ICC bus that routes IPIs.
+ * @param icc_bus - The ICC bus that routes IPIs.
+ * @param apic_src - The source APIC id.
* @param apic_num - The remote APIC number.
- * @param intr_num - The interrupt number.
+ * @param icr - A copy of the APIC's ICR. (LAPIC-style ICR, clone from redir table for ioapics)
*/
-int v3_icc_send_irq(struct vm_device * icc_bus, uint8_t apic_num, uint32_t irq_num);
+int v3_icc_send_ipi(struct vm_device * icc_bus, uint32_t apic_src, uint64_t icr);
+
+#if 0
+/**
+ * Send an IRQinter-processor interrupt (IPI) from one local APIC to another local APIC.
+ *
+ * @param icc_bus - The ICC bus that routes IPIs.
+ * @param apic_src - The source APIC id.
+ * @param apic_num - The remote APIC number.
+ * @param icrlo - The low 32 bites of the APIC's ICR.
+ */
+int v3_icc_send_irq(struct vm_device * icc_bus, uint32_t ioapic_src, uint8_t apic_num, uint8_t irq);
+#endif
#endif /* ICC_BUS_H_ */
#define PCI_AUTO_DEV_NUM (-1)
+struct guest_info;
+
struct pci_device;
struct v3_pci_bar {
-
+/* per-core state */
struct guest_info {
uint64_t rip;
-
+/* shared state across cores */
struct v3_vm_info {
v3_vm_class_t vm_class;
unsigned int (*get_cpu)(void);
void (*interrupt_cpu)(struct v3_vm_info * vm, int logical_cpu, int vector);
void (*call_on_cpu)(int logical_cpu, void (*fn)(void * arg), void * arg);
- void (*start_thread_on_cpu)(int logical_cpu, int (*fn)(void * arg), void * arg, char * thread_name);
+ void * (*start_thread_on_cpu)(int logical_cpu, int (*fn)(void * arg), void * arg, char * thread_name);
};
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2010, Peter Dinda <pdinda@cs.northwestern.edu>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Peter Dinda <pdinda@cs.northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __VMM_MPTABLE_H__
+#define __VMM_MPTABLE_H__
+
+/*
+ This module is responsible for injecting an appropriate description of
+ the multicore guest into the the guest's memory in the form
+ of an Intel Multiprocessor Specification-compatible MP table.
+
+ The guest BIOS must cooperate in having preallocated space for the table
+*/
+
+#include <palacios/vm_guest.h>
+#include <palacios/vmm_mem.h>
+#include <palacios/vmm_types.h>
+
+// Note that this must be run *after* the rombios has been mapped in
+// AND the rombios needs to be COPIED in so that we can edit it
+int v3_inject_mptable(struct v3_vm_info *vm);
+
+#endif
typedef enum {VM_RUNNING, VM_STOPPED, VM_SUSPENDED, VM_ERROR, VM_EMULATING} v3_vm_operating_mode_t;
-typedef enum {REAL, /*UNREAL,*/ PROTECTED, PROTECTED_PAE, LONG, LONG_32_COMPAT, LONG_16_COMPAT} v3_cpu_mode_t;
+typedef enum {INIT, SIPI, REAL, /*UNREAL,*/ PROTECTED, PROTECTED_PAE, LONG, LONG_32_COMPAT, LONG_16_COMPAT} v3_cpu_mode_t;
typedef enum {PHYSICAL_MEM, VIRTUAL_MEM} v3_mem_mode_t;
help
The ICC Bus for APIC/IOAPIC communication
+config DEBUG_ICC_BUS
+ bool "ICC BUS Debugging"
+ default n
+ depends on ICC_BUS && DEBUG_ON
+ help
+ Enable debugging for the ICC BUS
+
config BOCHS_DEBUG
bool "Bochs Debug Console Device"
static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, uint_t length, void * priv_data);
static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, uint_t length, void * priv_data);
-static void init_apic_state(struct apic_state * apic) {
+static void init_apic_state(struct apic_state * apic, uint32_t id, struct vm_device * icc) {
apic->base_addr = DEFAULT_BASE_ADDR;
apic->base_addr_msr.value = 0x0000000000000900LL;
apic->base_addr_msr.value |= ((uint64_t)DEFAULT_BASE_ADDR);
- PrintDebug("Sizeof Interrupt Request Register %d, should be 32\n",
+ PrintDebug("apic %u: Sizeof Interrupt Request Register %d, should be 32\n", apic->lapic_id.val,
(uint_t)sizeof(apic->int_req_reg));
memset(apic->int_req_reg, 0, sizeof(apic->int_req_reg));
apic->tmr_init_cnt = 0x00000000;
apic->tmr_cur_cnt = 0x00000000;
- // TODO:
- // We need to figure out what the APIC ID is....
- apic->lapic_id.val = 0x00000000;
+ apic->lapic_id.val = id;
+
+ apic->icc_bus = icc;
// The P6 has 6 LVT entries, so we set the value to (6-1)...
apic->apic_ver.val = 0x80050010;
if (old_reg == NULL) {
// uh oh...
- PrintError("APIC Base address region does not exit...\n");
+ PrintError("apic %u: APIC Base address region does not exit...\n",apic->lapic_id.val);
return -1;
}
apic->base_addr = src.value;
if (v3_hook_full_mem(dev->vm, core->cpu_id, apic->base_addr, apic->base_addr + PAGE_SIZE_4KB, apic_read, apic_write, dev) == -1) {
- PrintError("Could not hook new APIC Base address\n");
+ PrintError("apic %u: Could not hook new APIC Base address\n",apic->lapic_id.val);
v3_unlock(apic->lock);
return -1;
}
uchar_t * en_location = apic->int_en_reg + major_offset;
uchar_t flag = 0x1 << minor_offset;
+
+#if 1
+
if (irq_num <= 15) {
- PrintError("Attempting to raise an invalid interrupt: %d\n", irq_num);
+ PrintError("apic %u: Attempting to raise an invalid interrupt: %d\n", apic->lapic_id.val,irq_num);
return -1;
}
- PrintDebug("Raising APIC IRQ %d\n", irq_num);
+#endif
+
+
+ PrintDebug("apic %u: Raising APIC IRQ %d\n", apic->lapic_id.val,irq_num);
if (*req_location & flag) {
//V3_Print("Interrupts coallescing\n");
if (*en_location & flag) {
*req_location |= flag;
} else {
- PrintDebug("Interrupt not enabled... %.2x\n", *en_location);
+ PrintDebug("apic %u: Interrupt not enabled... %.2x\n", apic->lapic_id.val, *en_location);
return 0;
}
masked = apic->err_vec_tbl.mask;
break;
default:
- PrintError("Invalid APIC interrupt type\n");
+ PrintError("apic %u: Invalid APIC interrupt type\n",apic->lapic_id.val);
return -1;
}
// interrupt is masked, don't send
if (masked == 1) {
- PrintDebug("Inerrupt is masked\n");
+ PrintDebug("apic %u: Inerrupt is masked\n",apic->lapic_id.val);
return 0;
}
//PrintDebug("Activating internal APIC IRQ %d\n", vec_num);
return activate_apic_irq(apic, vec_num);
} else {
- PrintError("Unhandled Delivery Mode\n");
+ PrintError("apic %u: Unhandled Delivery Mode\n",apic->lapic_id.val);
return -1;
}
}
static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, uint_t length, void * priv_data) {
- struct apic_state * apic = (struct apic_state *)priv_data;
+ struct apic_state * apics = (struct apic_state *)(priv_data);
+ struct apic_state * apic = &(apics[core->cpu_id]);
addr_t reg_addr = guest_addr - apic->base_addr;
struct apic_msr * msr = (struct apic_msr *)&(apic->base_addr_msr.value);
uint32_t val = 0;
- PrintDebug("Read apic address space (%p)\n",
+ PrintDebug("apic %u: Read apic address space (%p)\n",apic->lapic_id.val,
(void *)guest_addr);
if (msr->apic_enable == 0) {
- PrintError("Write to APIC address space with disabled APIC\n");
+ PrintError("apic %u: Write to APIC address space with disabled APIC\n",apic->lapic_id.val);
return -1;
}
case SEOI_OFFSET:
default:
- PrintError("Read from Unhandled APIC Register: %x\n", (uint32_t)reg_addr);
- return -1;
+ PrintError("apic %u: Read from Unhandled APIC Register: %x (getting zero)\n", apic->lapic_id.val, (uint32_t)reg_addr);
+ // return -1;
+ val=0;
}
*val_ptr = val;
} else {
- PrintError("Invalid apic read length (%d)\n", length);
+ PrintError("apic %u: Invalid apic read length (%d)\n", apic->lapic_id.val, length);
return -1;
}
- PrintDebug("Read finished (val=%x)\n", *(uint32_t *)dst);
+ PrintDebug("apic %u: Read finished (val=%x)\n", apic->lapic_id.val, *(uint32_t *)dst);
return length;
}
*
*/
static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, uint_t length, void * priv_data) {
- struct apic_state * apic = (struct apic_state *)priv_data;
+ struct apic_state * apics = (struct apic_state *)(priv_data);
+ struct apic_state * apic = &(apics[core->cpu_id]);
addr_t reg_addr = guest_addr - apic->base_addr;
struct apic_msr * msr = (struct apic_msr *)&(apic->base_addr_msr.value);
uint32_t op_val = *(uint32_t *)src;
- PrintDebug("Write to apic address space (%p) (val=%x)\n",
+ PrintDebug("apic %u: Write to address space (%p) (val=%x)\n",
+ apic->lapic_id.val,
(void *)guest_addr, *(uint32_t *)src);
if (msr->apic_enable == 0) {
- PrintError("Write to APIC address space with disabled APIC\n");
+ PrintError("apic %u: Write to APIC address space with disabled APIC\n",apic->lapic_id.val);
return -1;
}
if (length != 4) {
- PrintError("Invalid apic write length (%d)\n", length);
+ PrintError("apic %u: Invalid apic write length (%d)\n", apic->lapic_id.val, length);
return -1;
}
case PPR_OFFSET:
case EXT_APIC_FEATURE_OFFSET:
#if 1
- PrintError("Attempting to write to read only register %p (ignored)\n", (void *)reg_addr);
+ PrintError("apic %u: Attempting to write to read only register %p (ignored)\n", apic->lapic_id.val, (void *)reg_addr);
#else
- PrintError("Attempting to write to read only register %p (error)\n", (void *)reg_addr);
+ PrintError("apic %u: Attempting to write to read only register %p (error)\n", apic->lapic_id.val, (void *)reg_addr);
return -1;
#endif
break;
case INT_CMD_LO_OFFSET:
apic->int_cmd.lo = op_val;
// ICC???
- v3_icc_send_irq(apic->icc_bus, apic->int_cmd.dst, apic->int_cmd.val);
+ PrintDebug("apic %u: sending cmd 0x%llx to apic %u\n",apic->lapic_id.val,
+ apic->int_cmd.val, apic->int_cmd.dst);
+ v3_icc_send_ipi(apic->icc_bus, apic->lapic_id.val, apic->int_cmd.val);
break;
case INT_CMD_HI_OFFSET:
apic->int_cmd.hi = op_val;
case EXT_APIC_CMD_OFFSET:
case SEOI_OFFSET:
default:
- PrintError("Write to Unhandled APIC Register: %x\n", (uint32_t)reg_addr);
- return -1;
+ PrintError("apic %u: Write to Unhandled APIC Register: %x (ignored)\n", apic->lapic_id.val, (uint32_t)reg_addr);
+ // return -1;
}
- PrintDebug("Write finished\n");
+ PrintDebug("apic %u: Write finished\n",apic->lapic_id.val);
return length;
}
/* Timer Functions */
static void apic_update_time(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * priv_data) {
- struct apic_state * apic = (struct apic_state *)priv_data;
+ struct apic_state * apics = (struct apic_state *)(priv_data);
+ struct apic_state * apic = &(apics[info->cpu_id]);
// The 32 bit GCC runtime is a pile of shit
#ifdef __V3_64BIT__
uint64_t tmr_ticks = 0;
if ((apic->tmr_init_cnt == 0) ||
( (apic->tmr_vec_tbl.tmr_mode == APIC_TMR_ONESHOT) &&
(apic->tmr_cur_cnt == 0))) {
- //PrintDebug("APIC timer not yet initialized\n");
+ //PrintDebug("apic %u: APIC timer not yet initialized\n",apic->lapic_id.val);
return;
}
shift_num = 7;
break;
default:
- PrintError("Invalid Timer Divider configuration\n");
+ PrintError("apic %u: Invalid Timer Divider configuration\n",apic->lapic_id.val);
return;
}
apic->tmr_cur_cnt = 0;
// raise irq
- PrintDebug("Raising APIC Timer interrupt (periodic=%d) (icnt=%d) (div=%d)\n",
+ PrintDebug("apic %u: Raising APIC Timer interrupt (periodic=%d) (icnt=%d) (div=%d)\n", apic->lapic_id.val,
apic->tmr_vec_tbl.tmr_mode, apic->tmr_init_cnt, shift_num);
if (apic_intr_pending(info, priv_data)) {
- PrintDebug("Overriding pending IRQ %d\n", apic_get_intr_number(info, priv_data));
+ PrintDebug("apic %u: Overriding pending IRQ %d\n", apic->lapic_id.val, apic_get_intr_number(info, priv_data));
}
if (activate_internal_irq(apic, APIC_TMR_INT) == -1) {
- PrintError("Could not raise Timer interrupt\n");
+ PrintError("apic %u: Could not raise Timer interrupt\n",apic->lapic_id.val);
}
if (apic->tmr_vec_tbl.tmr_mode == APIC_TMR_PERIODIC) {
static int apic_free(struct vm_device * dev) {
+
+ /* TODO: This should crosscall to force an unhook on each CPU */
+
// struct apic_state * apic = (struct apic_state *)dev->private_data;
v3_unhook_msr(dev->vm, BASE_ADDR_MSR);
static int apic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
- PrintDebug("Creating APIC\n");
+ PrintDebug("apic: creating an APIC for each core\n");
char * name = v3_cfg_val(cfg, "name");
- char * icc_name = v3_cfg_val(cfg,"irq_bus");
+ char * icc_name = v3_cfg_val(cfg,"bus");
struct vm_device * icc = v3_find_dev(vm, icc_name);
int i;
if (!icc) {
- PrintError("Cannot find ICC Bus (%s)\n", icc_name);
+ PrintError("apic: Cannot find ICC Bus (%s)\n", icc_name);
return -1;
}
// We allocate one apic per core
// APICs are accessed via index which correlates with the core's cpu_id
+ // 0..num_cores-1 at num_cores is the ioapic (one only)
struct apic_state * apic = (struct apic_state *)V3_Malloc(sizeof(struct apic_state) * vm->num_cores);
struct vm_device * dev = v3_allocate_device(name, &dev_ops, apic);
if (v3_attach_device(vm, dev) == -1) {
- PrintError("Could not attach device %s\n", name);
+ PrintError("apic: Could not attach device %s\n", name);
return -1;
}
for (i = 0; i < vm->num_cores; i++) {
struct guest_info * core = &(vm->cores[i]);
+ init_apic_state(&(apic[i]),i,icc);
+
v3_register_intr_controller(core, &intr_ops, &(apic[i]));
+
v3_add_timer(core, &timer_ops, &(apic[i]));
+
v3_hook_full_mem(vm, core->cpu_id, apic->base_addr, apic->base_addr + PAGE_SIZE_4KB, apic_read, apic_write, &(apic[i]));
v3_icc_register_apic(core, icc, i, &icc_ops, &(apic[i]));
- init_apic_state(&(apic[i]));
}
#include <palacios/vmm_sprintf.h>
#include <palacios/vm_guest.h>
#include <devices/icc_bus.h>
+#include <devices/apic_regs.h>
+
#define MAX_APICS 256
+#ifndef CONFIG_DEBUG_ICC_BUS
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+
+void v3_force_exit(void *p) {
+ struct guest_info *core=(struct guest_info *)p;
+ PrintDebug("core %u: Forced to exit!\n",core->cpu_id);
+}
struct ipi_thunk_data {
struct vm_device * target;
uint64_t val;
};
-struct int_cmd_reg {
- union {
- uint64_t val;
-
- struct {
- uint32_t lo;
- uint32_t hi;
- } __attribute__((packed));
-
- struct {
- uint_t vec : 8;
- uint_t msg_type : 3;
- uint_t dst_mode : 1;
- uint_t del_status : 1;
- uint_t rsvd1 : 1;
- uint_t lvl : 1;
- uint_t trig_mode : 1;
- uint_t rem_rd_status : 2;
- uint_t dst_shorthand : 2;
- uint64_t rsvd2 : 36;
- uint32_t dst : 8;
- } __attribute__((packed));
- } __attribute__((packed));
-} __attribute__((packed));
-
-
struct apic_data {
struct icc_bus_state {
struct apic_data apics[MAX_APICS];
+
+ uint32_t ioapic_id;
};
static struct v3_device_ops dev_ops = {
};
+static char *shorthand_str[] = {
+ "(no shorthand)",
+ "(self)",
+ "(all)",
+ "(all-but-me)",
+ };
+
+static char *deliverymode_str[] = {
+ "(fixed)",
+ "(lowest priority)",
+ "(SMI)",
+ "(reserved)",
+ "(NMI)",
+ "(INIT)",
+ "(Start Up)",
+ "(reserved)",
+};
-int v3_icc_send_irq(struct vm_device * icc_bus, uint8_t apic_num, uint32_t irq_num) {
- struct icc_bus_state * state = (struct icc_bus_state *)icc_bus->private_data;
- struct apic_data * apic = &(state->apics[apic_num]);
+static int deliver(uint32_t src_apic, struct apic_data *dest_apic, struct int_cmd_reg *icr, struct icc_bus_state * state) {
+
+ switch (icr->del_mode) {
+
+ case 0: //fixed
+ case 1: // lowest priority
+ PrintDebug("icc_bus: delivering IRQ to core %u\n",dest_apic->core->cpu_id);
+ dest_apic->ops->raise_intr(dest_apic->core, icr->vec, dest_apic->priv_data);
+ if (src_apic!=state->ioapic_id && dest_apic->core->cpu_id != src_apic) {
+ // Assume core # is same as logical processor for now
+ // TODO FIX THIS FIX THIS
+ // THERE SHOULD BE: guestapicid->virtualapicid map,
+ // cpu_id->logical processor map
+ // host maitains logical proc->phsysical proc
+ PrintDebug("icc_bus: non-local core, forcing it to exit\n");
+ V3_Call_On_CPU(dest_apic->core->cpu_id,v3_force_exit,(void*)(dest_apic->core));
+ // TODO: do what the print says
+ }
+ break;
+
+ case 2: //SMI
+ PrintError("icc_bus: SMI delivery is unsupported\n");
+ return -1;
+ break;
+
+ case 3: //reserved
+ case 7:
+ PrintError("icc_bus: Reserved delivery mode 3 is unsupported\n");
+ return -1;
+ break;
+
+ case 4: //NMI
+ PrintError("icc_bus: NMI delivery is unsupported\n");
+ return -1;
+ break;
+
+ case 5: { //INIT
+ struct guest_info *core = dest_apic->core;
+
+ PrintDebug("icc_bus: INIT delivery to core %u\n",core->cpu_id);
+
+ // TODO: any APIC reset on dest core (shouldn't be needed, but not sure...)
+
+ // Sanity check
+ if (core->cpu_mode!=INIT) {
+ PrintError("icc_bus: Warning: core %u is not in INIT state, ignored\n",core->cpu_id);
+ // Only a warning, since INIT INIT SIPI is common
+ break;
+ }
+
+ // We transition the target core to SIPI state
+ core->cpu_mode=SIPI; // note: locking should not be needed here
+
+ // That should be it since the target core should be
+ // waiting in host on this transition
+ // either it's on another core or on a different preemptive thread
+ // in both cases, it will quickly notice this transition
+ // in particular, we should not need to force an exit here
+
+ PrintDebug("icc_bus: INIT delivery done\n");
+
+ }
+ break;
+
+ case 6: { //SIPI
+ struct guest_info *core = dest_apic->core;
+ uint64_t rip = icr->vec << 12; // vector encodes target address;
+
+ PrintDebug("icc_bus: SIPI delivery (0x%x -> rip=0x%p) to core %u\n",
+ icr->vec, (void*)rip, core->cpu_id);
+
+ // Sanity check
+ if (core->cpu_mode!=SIPI) {
+ PrintError("icc_bus: core %u is not in SIPI state, ignored!\n",core->cpu_id);
+ break;
+ }
+
+ // Write the RIP, CS, and descriptor
+ // assume the rest is already good to go
+ core->rip=rip & 0xffff;
+ core->segments.cs.selector = (rip >> 4) & 0xf000;
+ core->segments.cs.limit= 0xffff;
+ core->segments.cs.base = rip & 0xf0000;
+
+ // Maybe need to adjust the APIC?
+
+ // We transition the target core to SIPI state
+ core->cpu_mode=REAL; // note: locking should not be needed here
+
+ // As with INIT, we should not need to do anything else
+
+ PrintDebug("icc_bus: SIPI delivery done\n");
+
+ }
+ break;
+ }
- struct int_cmd_reg icr;
- icr.lo = irq_num;
+ return 0;
+}
- char * type = NULL;
- char * dest = NULL;
- char foo[8];
- switch (icr.dst_shorthand) {
- case 0x0:
- sprintf(foo, "%d", icr.dst);
- dest = foo;
- break;
- case 0x1:
- dest = "(self)";
- break;
- case 0x2:
- dest = "(broadcast inclusive)";
- break;
- case 0x3:
- dest = "(broadcast)";
- break;
- }
+int v3_icc_send_ipi(struct vm_device * icc_bus, uint32_t src_apic, uint64_t icr_data) {
- switch (icr.msg_type) {
- case 0x0:
- type = "";
- break;
- case 0x4:
- type = "(NMI)";
- break;
- case 0x5:
- type = "(INIT)";
- break;
- case 0x6:
- type = "(Startup)";
- break;
+ PrintDebug("icc_bus: icc_bus=%p, src_apic=%u, icr_data=%llx\n",icc_bus,src_apic,icr_data);
+
+ struct int_cmd_reg *icr = (struct int_cmd_reg *)&icr_data;
+ struct icc_bus_state * state = (struct icc_bus_state *)icc_bus->private_data;
+
+ // initial sanity checks
+ if (src_apic>=MAX_APICS || (!state->apics[src_apic].present && src_apic!=state->ioapic_id)) {
+ PrintError("icc_bus: Apparently sending from unregistered apic id=%u\n",src_apic);
+ return -1;
+ }
+ if (icr->dst_mode==0 && !state->apics[icr->dst].present) {
+ PrintError("icc_bus: Attempted send to unregistered apic id=%u\n",icr->dst);
+ return -1;
}
+
+ struct apic_data * dest_apic = &(state->apics[icr->dst]);
+
+
+ PrintDebug("icc_bus: IPI %s %u from %s %u to %s %u (icr=0x%llx)\n",
+ deliverymode_str[icr->del_mode], icr->vec, src_apic==state->ioapic_id ? "ioapic" : "apic",
+ src_apic, shorthand_str[icr->dst_shorthand], icr->dst,icr->val);
- PrintDebug("Sending IPI of type %s and destination type %s from LAPIC %u to LAPIC %u.\n",
- type, dest, V3_Get_CPU(), apic_num);
- apic->ops->raise_intr(apic->core, irq_num & 0xff, apic->priv_data);
- //V3_Call_On_CPU(apic_num, icc_force_exit, (void *)(uint64_t)(val & 0xff));
+ switch (icr->dst_shorthand) {
+
+ case 0: // no shorthand
+ if (deliver(src_apic,dest_apic,icr,state)) {
+ return -1;
+ }
+ break;
+
+ case 1: // self
+ if (icr->dst==state->ioapic_id) {
+ PrintError("icc_bus: ioapic attempting to send to itself\n");
+ return -1;
+ }
+ if (deliver(src_apic,dest_apic,icr,state)) {
+ return -1;
+ }
+ break;
+
+ case 2:
+ case 3: { // all and all-but-me
+ int i;
+ for (i=0;i<MAX_APICS;i++) {
+ dest_apic=&(state->apics[i]);
+ if (dest_apic->present && (i!=src_apic || icr->dst_shorthand==2)) {
+ if (deliver(src_apic,dest_apic,icr,state)) {
+ return -1;
+ }
+ }
+ }
+ }
+ break;
+ }
return 0;
}
struct apic_data * apic = &(icc->apics[apic_num]);
if (apic->present == 1) {
- PrintError("Attempt to re-register apic %u\n", apic_num);
+ PrintError("icc_bus: Attempt to re-register apic %u\n", apic_num);
return -1;
}
apic->core = core;
apic->ops = ops;
- PrintDebug("Registered apic%u\n", apic_num);
+ PrintDebug("icc_bus: Registered apic %u\n", apic_num);
return 0;
}
+int v3_icc_register_ioapic(struct v3_vm_info *vm, struct vm_device * icc_bus, uint8_t apic_num)
+{
+ struct icc_bus_state * icc = (struct icc_bus_state *)icc_bus->private_data;
+
+ if (icc->ioapic_id) {
+ PrintError("icc_bus: Attempt to register a second ioapic!\n");
+ return -1;
+ }
+
+ icc->ioapic_id=apic_num;
+
+ PrintDebug("icc_bus: Registered ioapic %u\n", apic_num);
+
+
+ return 0;
+}
+
static int icc_bus_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
- PrintDebug("Creating ICC_BUS\n");
+ PrintDebug("icc_bus: Creating ICC_BUS\n");
char * name = v3_cfg_val(cfg, "name");
struct vm_device * dev = v3_allocate_device(name, &dev_ops, icc_bus);
if (v3_attach_device(vm, dev) == -1) {
- PrintError("Could not attach device %s\n", name);
+ PrintError("icc_bus: Could not attach device %s\n", name);
return -1;
}
#include <palacios/vmm.h>
#include <palacios/vmm_dev_mgr.h>
#include <devices/icc_bus.h>
+#include <devices/apic_regs.h>
#include <palacios/vm_guest.h>
#ifndef CONFIG_DEBUG_IO_APIC
};
-static void init_ioapic_state(struct io_apic_state * ioapic) {
+static void init_ioapic_state(struct io_apic_state * ioapic, uint32_t id) {
int i = 0;
ioapic->base_addr = IO_APIC_BASE_ADDR;
ioapic->index_reg = 0;
- ioapic->ioapic_id.val = 0x00000000;
+ ioapic->ioapic_id.val = id;
ioapic->ioapic_ver.val = 0x00170011;
ioapic->ioapic_arb_id.val = 0x00000000;
uint32_t reg_tgt = guest_addr - ioapic->base_addr;
uint32_t * op_val = (uint32_t *)dst;
- PrintDebug("IOAPIC Read at %p\n", (void *)guest_addr);
+ PrintDebug("ioapic %u: IOAPIC Read at %p\n", ioapic->ioapic_id.val, (void *)guest_addr);
if (reg_tgt == 0x00) {
*op_val = ioapic->index_reg;
uint_t hi_val = (ioapic->index_reg - IOAPIC_REDIR_BASE_REG) % 1;
if (redir_index > 0x3f) {
- PrintError("Invalid redirection table entry %x\n", (uint32_t)redir_index);
+ PrintError("ioapic %u: Invalid redirection table entry %x\n", ioapic->ioapic_id.val, (uint32_t)redir_index);
return -1;
}
if (hi_val) {
uint32_t reg_tgt = guest_addr - ioapic->base_addr;
uint32_t op_val = *(uint32_t *)src;
- PrintDebug("IOAPIC Write at %p (val = %d)\n", (void *)guest_addr, *(uint32_t *)src);
+ PrintDebug("ioapic %u: IOAPIC Write at %p (val = %d)\n", ioapic->ioapic_id.val, (void *)guest_addr, *(uint32_t *)src);
if (reg_tgt == 0x00) {
ioapic->index_reg = op_val;
break;
case IOAPIC_VER_REG:
// GPF/PageFault/Ignore?
- PrintError("Writing to read only IOAPIC register\n");
+ PrintError("ioapic %u: Writing to read only IOAPIC register\n", ioapic->ioapic_id.val);
return -1;
case IOAPIC_ARB_REG:
ioapic->ioapic_arb_id.val = op_val;
if (redir_index > 0x3f) {
- PrintError("Invalid redirection table entry %x\n", (uint32_t)redir_index);
+ PrintError("ioapic %u: Invalid redirection table entry %x\n", ioapic->ioapic_id.val, (uint32_t)redir_index);
return -1;
}
if (hi_val) {
- PrintDebug("Writing to hi of pin %d\n", redir_index);
+ PrintDebug("ioapic %u: Writing to hi of pin %d\n", ioapic->ioapic_id.val, redir_index);
ioapic->redir_tbl[redir_index].hi = op_val;
} else {
- PrintDebug("Writing to lo of pin %d\n", redir_index);
+ PrintDebug("ioapic %u: Writing to lo of pin %d\n", ioapic->ioapic_id.val, redir_index);
op_val &= REDIR_LO_MASK;
ioapic->redir_tbl[redir_index].lo &= ~REDIR_LO_MASK;
ioapic->redir_tbl[redir_index].lo |= op_val;
struct redir_tbl_entry * irq_entry = NULL;
if (irq > 24) {
- PrintDebug("IRQ out of range of IO APIC\n");
+ PrintDebug("ioapic %u: IRQ out of range of IO APIC\n", ioapic->ioapic_id.val);
return -1;
}
irq_entry = &(ioapic->redir_tbl[irq]);
if (irq_entry->mask == 0) {
- PrintDebug("IOAPIC Signalling APIC to raise INTR %d\n", irq_entry->vec);
- v3_icc_send_irq(ioapic->icc_bus, irq_entry->dst_field, irq_entry->vec);
+ PrintDebug("ioapic %u: IOAPIC Signalling APIC to raise INTR %d\n", ioapic->ioapic_id.val, irq_entry->vec);
+
+ // the format of the redirection table entry is just slightly
+ // different than that of the lapic's cmd register, which is the other
+ // way an IPI is initiated. So we will translate
+ //
+ struct int_cmd_reg icr;
+
+ icr.val = irq_entry->val;
+ icr.rsvd1=0;
+ icr.lvl=1;
+ icr.trig_mode=irq_entry->trig_mode;
+ icr.rem_rd_status=0;
+ icr.dst_shorthand=0; // no shorthand
+ icr.rsvd2=0;
+
+ v3_icc_send_ipi(ioapic->icc_bus, ioapic->ioapic_id.val,icr.val);
}
return 0;
static int ioapic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
- struct vm_device * icc_bus = v3_find_dev(vm, v3_cfg_val(cfg, "irq_bus"));
+ struct vm_device * icc_bus = v3_find_dev(vm, v3_cfg_val(cfg, "bus"));
char * name = v3_cfg_val(cfg, "name");
if (!icc_bus) {
- PrintError("Could not locate ICC BUS device (%s)\n", v3_cfg_val(cfg, "irq_bus"));
+ PrintError("ioapic: Could not locate ICC BUS device (%s)\n", v3_cfg_val(cfg, "bus"));
return -1;
}
- PrintDebug("Creating IO APIC\n");
+ PrintDebug("ioapic: Creating IO APIC\n");
struct io_apic_state * ioapic = (struct io_apic_state *)V3_Malloc(sizeof(struct io_apic_state));
if (v3_attach_device(vm, dev) == -1) {
- PrintError("Could not attach device %s\n", name);
+ PrintError("ioapic: Could not attach device %s\n", name);
return -1;
}
v3_register_intr_router(vm, &router_ops, dev);
- init_ioapic_state(ioapic);
+
+ init_ioapic_state(ioapic,vm->num_cores);
+
+ v3_icc_register_ioapic(vm,icc_bus,ioapic->ioapic_id.val);
v3_hook_full_mem(vm, V3_MEM_CORE_ANY, ioapic->base_addr, ioapic->base_addr + PAGE_SIZE_4KB,
ioapic_read, ioapic_write, dev);
PrintDebug("Virtio NIC: Virtio Pkt Sending, net_state: %p, pkt size: %d\n", virtio, len);
- if (guest_pa_to_host_va(core, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) {
+ if (v3_gpa_to_hva(core, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) {
PrintError("Could not translate buffer address\n");
return -1;
}
uint32_t len;
uint8_t * desc_buf = NULL;
- if (guest_pa_to_host_va(core, desc->addr_gpa, (addr_t *)&(desc_buf)) == -1) {
+ if (v3_gpa_to_hva(core, desc->addr_gpa, (addr_t *)&(desc_buf)) == -1) {
PrintError("Could not translate buffer address\n");
return -1;
}
int i = 0;
hdr_desc = &(q->desc[desc_idx]);
- if (guest_pa_to_host_va(core, hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
+ if (v3_gpa_to_hva(core, hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
PrintError("Could not translate block header address\n");
return -1;
}
// round up to next page boundary.
queue->ring_used_addr = (queue->ring_used_addr + 0xfff) & ~0xfff;
- if (guest_pa_to_host_va(core, queue->ring_desc_addr, (addr_t *)&(queue->desc)) == -1) {
+ if (v3_gpa_to_hva(core, queue->ring_desc_addr, (addr_t *)&(queue->desc)) == -1) {
PrintError("Could not translate ring descriptor address\n");
return -1;
}
- if (guest_pa_to_host_va(core, queue->ring_avail_addr, (addr_t *)&(queue->avail)) == -1) {
+ if (v3_gpa_to_hva(core, queue->ring_avail_addr, (addr_t *)&(queue->avail)) == -1) {
PrintError("Could not translate ring available address\n");
return -1;
}
- if (guest_pa_to_host_va(core, queue->ring_used_addr, (addr_t *)&(queue->used)) == -1) {
+ if (v3_gpa_to_hva(core, queue->ring_used_addr, (addr_t *)&(queue->used)) == -1) {
PrintError("Could not translate ring used address\n");
return -1;
}
struct vring_desc * hdr_desc = NULL;
hdr_desc = &(q->desc[hdr_idx]);
- if (guest_pa_to_host_va(&(virtio->virtio_dev->vm->cores[0]), hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
+ if (v3_gpa_to_hva(&(virtio->virtio_dev->vm->cores[0]), hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
PrintError("Could not translate receive buffer address\n");
ret_val = -1;
goto exit;
*/
#include <devices/pci.h>
-#include <devices/ne2k.h>
+#include <config/ne2k.h>
#include <palacios/vmm.h>
#include <palacios/vmm_types.h>
#include <palacios/vmm_io.h>
#include <palacios/vmm_debug.h>
#include <palacios/vmm_string.h>
+#include <palacios/vmm_dev_mgr.h>
+#include <palacios/vmm_intr.h>
#ifndef CONFIG_DEBUG_NE2K
#undef PrintDebug
static int ne2k_update_irq(struct vm_device *dev) {
- struct ne2k_context * nic_state = (struct ne2k_context *)dev->private_data;
+ struct ne2k_context * nic_state = (struct ne2k_context *)(dev->private_data);
struct pci_device * pci_dev = nic_state->pci_dev;
int irq_line = 0;
PrintDebug("Ne2k: Device %p is not attached to any PCI Bus\n", nic_state);
irq_line = NE2K_DEFAULT_IRQ;
} else {
- irq_line = pdev->config_header.intr_line;
+ irq_line = pci_dev->config_header.intr_line;
}
if (irq_line == 0){
// The top bit of the ISR/IMR is reserved and does not indicate and irq event
// We mask the bit out of the irq pending check
if ((nic_state->isr.val & nic_state->imr.val) & 0x7f) {
- v3_raise_irq(nic_state->vm, irq_line);
+ v3_raise_virq(nic_state->vm, irq_line);
PrintDebug("Ne2k: RaiseIrq: isr: 0x%02x imr: 0x%02x\n", nic_state->isr.val, nic_state->imr.val);
}
nic_state->vm = dev->vm;
- nic_state->isr.reset = 1;
+ nic_state->isr.reset_status = 1;
nic_state->imr.val = 0x00;
nic_state->cmd.val = 0x22;
p = nic_state->mem + index;
nic_state->rsr.val = 0;
- nic_state->rsr.rx_pkt_ok = 1;
+ nic_state->rsr.pkt_rx_ok = 1;
if (pkt[0] & 0x01) {
nic_state->rsr.phy = 1;
nic_state->cmd.val = *(uint8_t *)src;
if (!(nic_state->cmd.stop)) {
- nic_state->isr.reset = 0;
+ nic_state->isr.reset_status = 0;
// if ((send pkt) && (dma byte count == 0))
vmm_xml.o \
vmm_muxer.o \
vmm_mem_hook.o \
+ vmm_mptable.o \
obj-$(CONFIG_SVM) += svm.o \
svm_io.o \
// vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
+ PrintDebug("Starting SVM core %u\n",info->cpu_id);
+ if (info->cpu_mode==INIT) {
+ PrintDebug("SVM core %u: I am an AP in INIT mode, waiting for that to change\n",info->cpu_id);
+ while (info->cpu_mode==INIT) {
+ v3_yield(info);
+ //PrintDebug("SVM core %u: still waiting for INIT\n",info->cpu_id);
+ }
+ PrintDebug("SVM core %u: I am out of INIT\n",info->cpu_id);
+ if (info->cpu_mode==SIPI) {
+ PrintDebug("SVM core %u: I am waiting on a SIPI to set my starting address\n",info->cpu_id);
+ while (info->cpu_mode==SIPI) {
+ v3_yield(info);
+ //PrintDebug("SVM core %u: still waiting for SIPI\n",info->cpu_id);
+ }
+ }
+ PrintDebug("SVM core %u: I have my SIPI\n", info->cpu_id);
+ }
+
+ if (info->cpu_mode!=REAL) {
+ PrintError("SVM core %u: I am not in REAL mode at launch! Huh?!\n", info->cpu_id);
+ return -1;
+ }
+
+ PrintDebug("SVM core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x), RIP=0x%p\n",
+ info->cpu_id, info->segments.cs.selector, (void*)(info->segments.cs.base),
+ info->segments.cs.limit,(void*)(info->rip));
+
- PrintDebug("Launching SVM VM (vmcb=%p)\n", (void *)info->vmm_data);
+
+ PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p)\n", info->cpu_id, (void *)info->vmm_data);
//PrintDebugVMCB((vmcb_t*)(info->vmm_data));
info->vm_info->run_state = VM_RUNNING;
info->vm_info->run_state = VM_ERROR;
- V3_Print("SVM ERROR!!\n");
+ V3_Print("SVM core %u: SVM ERROR!!\n", info->cpu_id);
v3_print_guest_state(info);
- V3_Print("SVM Exit Code: %p\n", (void *)(addr_t)guest_ctrl->exit_code);
+ V3_Print("SVM core %u: SVM Exit Code: %p\n", info->cpu_id, (void *)(addr_t)guest_ctrl->exit_code);
- V3_Print("exit_info1 low = 0x%.8x\n", *(uint_t*)&(guest_ctrl->exit_info1));
- V3_Print("exit_info1 high = 0x%.8x\n", *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
+ V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
+ V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
- V3_Print("exit_info2 low = 0x%.8x\n", *(uint_t*)&(guest_ctrl->exit_info2));
- V3_Print("exit_info2 high = 0x%.8x\n", *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
+ V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
+ V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
v3_gva_to_hva(info, linear_addr, &host_addr);
}
- V3_Print("Host Address of rip = 0x%p\n", (void *)host_addr);
+ V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->cpu_id, (void *)host_addr);
- V3_Print("Instr (15 bytes) at %p:\n", (void *)host_addr);
+ V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->cpu_id, (void *)host_addr);
v3_dump_mem((uint8_t *)host_addr, 15);
v3_print_stack(info);
*/
}
+
+ // Need to take down the other cores on error...
+
return 0;
}
if (reg->flags.alloced == 0) {
PrintError("In GPA->HPA: Tried to translate physical address of non allocated page (addr=%p)\n",
(void *)gpa);
+ v3_print_mem_map(info->vm_info);
return -1;
}
}
-int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
- int i = 0;
- V3_Print("V3 -- Starting VM\n");
+static int start_core(void *p)
+{
+ struct guest_info * info = (struct guest_info*)p;
- for (i = 0; i < vm->num_cores; i++) {
- struct guest_info * info = &(vm->cores[i]);
+ PrintDebug("core %u: in start_core\n",info->cpu_id);
+
+ // we assume here that the APs are in INIT mode
+ // and only the BSP is in REAL
+ // the per-architecture code will rely on this
+ // assumption
- /* GRUESOM HACK... */
- // vm->cpu_id = v3_get_cpu_id();
- switch (v3_cpu_types[info->cpu_id]) {
+ switch (v3_cpu_types[info->cpu_id]) {
#ifdef CONFIG_SVM
- case V3_SVM_CPU:
- case V3_SVM_REV3_CPU:
- return v3_start_svm_guest(info);
- break;
+ case V3_SVM_CPU:
+ case V3_SVM_REV3_CPU:
+ return v3_start_svm_guest(info);
+ break;
#endif
#if CONFIG_VMX
- case V3_VMX_CPU:
- case V3_VMX_EPT_CPU:
- return v3_start_vmx_guest(info);
- break;
+ case V3_VMX_CPU:
+ case V3_VMX_EPT_CPU:
+ return v3_start_vmx_guest(info);
+ break;
#endif
- default:
- PrintError("Attemping to enter a guest on an invalid CPU\n");
- return -1;
+ default:
+ PrintError("Attemping to enter a guest on an invalid CPU\n");
+ return -1;
+ }
+ // should not happen
+ return 0;
+}
+
+
+static uint32_t get_next_core(unsigned int cpu_mask, uint32_t last_proc)
+{
+ uint32_t proc_to_use;
+
+ PrintDebug("In get_next_core cpu_mask=0x%x last_proc=%u\n",cpu_mask,last_proc);
+
+ proc_to_use=(last_proc+1) % 32; // only 32 procs
+ // This will wrap around, and eventually we can use proc 0,
+ // since that's clearly available
+ while (!((cpu_mask >> proc_to_use)&0x1)) {
+ proc_to_use=(proc_to_use+1)%32;
+ }
+ return proc_to_use;
+}
+
+int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
+ uint32_t i;
+ uint32_t last_proc;
+ uint32_t proc_to_use;
+ char tname[16];
+
+ V3_Print("V3 -- Starting VM (%u cores)\n",vm->num_cores);
+
+ // We assume that we are running on CPU 0 of the underlying system
+ last_proc=0;
+
+ // We will fork off cores 1..n first, then boot core zero
+
+ // for the AP, we need to create threads
+
+ for (i = 1; i < vm->num_cores; i++) {
+ if (!os_hooks->start_thread_on_cpu) {
+ PrintError("Host OS does not support start_thread_on_cpu - FAILING\n");
+ return -1;
}
+
+ proc_to_use=get_next_core(cpu_mask,last_proc);
+ last_proc=proc_to_use;
+
+ PrintDebug("Starting virtual core %u on logical core %u\n",i,proc_to_use);
+
+ sprintf(tname,"core%u",i);
+
+ PrintDebug("run: core=%u, func=0x%p, arg=0x%p, name=%s\n",
+ proc_to_use, start_core, &(vm->cores[i]), tname);
+
+ // TODO: actually manage these threads instead of just launching them
+ if (!(os_hooks->start_thread_on_cpu(proc_to_use,start_core,&(vm->cores[i]),tname))) {
+ PrintError("Thread launch failed\n");
+ return -1;
+ }
+ }
+
+ // Finally launch the BSP on core 0
+ sprintf(tname,"core%u",0);
+ if (!os_hooks->start_thread_on_cpu(0,start_core,&(vm->cores[0]),tname)) {
+ PrintError("Thread launch failed\n");
+ return -1;
}
return 0;
+
}
#include <palacios/vmm_xml.h>
#include <palacios/vmm_io.h>
#include <palacios/vmm_msr.h>
-
+#include <palacios/vmm_mptable.h>
return -1;
}
+ if (v3_inject_mptable(vm)==-1) {
+ PrintError("Failed to inject mptable during configuration\n");
+ return -1;
+ }
+
return 0;
}
for (i = 0; i < vm->num_cores; i++) {
struct guest_info * info = &(vm->cores[i]);
-
info->cpu_id = i;
info->vm_info = vm;
static int pre_config_pc_core(struct guest_info * info, v3_cfg_tree_t * cfg) {
+ if (info->cpu_id!=0) {
+ // I am an AP, so I will start in INIT mode,
+ // not in real mode. This means I will wait for
+ // an INIT and then for a SIPI. The SIPI will
+ // tell me where to start executing in real mode
+ info->cpu_mode = INIT;
+ } else {
+ // I am the MP, so I will start as normal
+ info->cpu_mode = REAL;
+ }
- info->cpu_mode = REAL;
info->mem_mode = PHYSICAL_MEM;
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2010, Peter Dinda <pdinda@cs.northwestern.edu>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Peter Dinda <pdinda@cs.northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_mptable.h>
+#include <palacios/vmm_string.h>
+#include <palacios/vm_guest_mem.h>
+
+/*
+ The guest bios is compiled with blank space for am MP table
+ at a default address. A cookie value is temporarily placed
+ there so we can verify it exists. If it does, we overwrite
+ the MP table based on the configuration we are given in the
+ guest info.
+
+ Currently, we set up n identical processors (based on
+ number of cores in guest info), with apics 0..n-1, and
+ ioapic as n.
+
+ The expectation is that the target will have
+ 8 bytes (for ___HVMMP signature) followed by 896 bytes of space
+ for a total of 904 bytes of space.
+ We write the floating pointer at target (16 bytes),
+ immediately followed by the mp config header, followed by
+ the entries.
+
+*/
+
+#define BIOS_MP_TABLE_DEFAULT_LOCATION 0xfcc00 // guest physical (linear)
+#define BIOS_MP_TABLE_COOKIE "___HVMMP"
+#define BIOS_MP_TABLE_COOKIE_LEN 8
+
+#define POINTER_SIGNATURE "_MP_"
+#define HEADER_SIGNATURE "PCMP"
+
+#define SPEC_REV ((uchar_t)0x4)
+#define OEM_ID "V3VEE "
+#define PROD_ID "PALACIOS 1.3 "
+
+#define LAPIC_ADDR 0xfee00000
+#define LAPIC_VERSION 0x11
+
+#define ENTRY_PROC 0
+#define ENTRY_BUS 1
+#define ENTRY_IOAPIC 2
+#define ENTRY_IOINT 3
+#define ENTRY_LOINT 4
+
+#define IOAPIC_ADDR 0xfec00000
+#define IOAPIC_VERSION 0x11
+
+// These are bochs defaults - should really come from cpuid of machne
+#define PROC_FAMILY 0x6
+#define PROC_STEPPING 0x0
+#define PROC_MODEL 0x0
+#define PROC_FEATURE_FLAGS 0x00000201
+
+
+#define BUS_ISA "ISA "
+
+#define INT_TYPE_INT 0
+#define INT_TYPE_NMI 1
+#define INT_TYPE_SMI 2
+#define INT_TYPE_EXT 3
+
+#define INT_POLARITY_DEFAULT 0
+#define INT_POLARITY_ACTIVE_HIGH 1
+#define INT_POLARITY_RESERVED 2
+#define INT_POLARITY_ACTIVE_LOW 3
+
+#define INT_TRIGGER_DEFAULT 0
+#define INT_TRIGGER_EDGE 1
+#define INT_TRIGGER_RESERVED 2
+#define INT_TRIGGER_LEVEL 3
+
+
+
+
+// This points to the mp table header
+struct mp_floating_pointer {
+ uint32_t signature; // "_MP_"
+ uint32_t pointer; // gpa of MP table (0xfcc00)
+ uint8_t length; // length in 16 byte chunks (paragraphs)
+ uint8_t spec_rev; // 0x4
+ uint8_t checksum;
+ uint8_t mp_featurebyte[5]; // zero out to indicate mp config table
+ // first byte nonzero => default configurations (see spec)
+ // second byte, bit 7 (top bit) = IMCR if set, virtual wire if zero
+} __attribute__((packed));
+
+
+struct mp_table_header {
+ uint32_t signature; // "PCMP"
+ uint16_t base_table_length; // bytes, starting from header
+ uint8_t spec_rev; // specification rvision (0x4 is the current rev)
+ uint8_t checksum; // sum of all bytes, including checksum, must be zero
+ uint8_t oem_id[8]; // OEM ID "V3VEE "
+ uint8_t prod_id[12]; // Product ID "PALACIOS 1.3"
+ uint32_t oem_table_ptr; // oem table, if used (zeroed)
+ uint16_t oem_table_size; // oem table length, if used
+ uint16_t entry_count; // numnber of entries in this table
+ uint32_t lapic_addr; // apic address on all processors
+ uint16_t extended_table_length; // zero by default
+ uint8_t extended_table_checksum; // zero by default
+ uint8_t reserved; // zero by default
+ // this is followed by entries of the various types indicated below
+} __attribute__((packed));
+
+struct mp_table_processor {
+ uint8_t entry_type; // type 0
+ uint8_t lapic_id; // 0..
+ uint8_t lapic_version; //
+ union {
+ uint8_t data;
+ struct {
+ uint8_t en:1; // 1=processor enabled
+ uint8_t bp:1; // 1=bootstrap processor
+ uint8_t reserved:6;
+ } fields;
+ } cpu_flags;
+ union {
+ uint32_t data;
+ struct {
+ uint8_t stepping:4;
+ uint8_t model:4;
+ uint8_t family:4;
+ uint32_t rest:20;
+ } fields;
+ } cpu_signature;
+ uint32_t cpu_feature_flags; // result of CPUID
+ uint32_t reserved[2];
+} __attribute__((packed));
+
+struct mp_table_bus {
+ uint8_t entry_type; // type 1
+ uint8_t bus_id; // 0..
+ uint8_t bus_type[6]; // "PCI" "INTERN", etc
+} __attribute__((packed));
+
+
+struct mp_table_ioapic {
+ uint8_t entry_type; // type 2
+ uint8_t ioapic_id; // 0..
+ uint8_t ioapic_version; // bits 0..7 of the version register
+ union {
+ uint8_t data;
+ struct {
+ uint8_t en:1; // 1=ioapic enabled
+ uint8_t reserved:7;
+ } fields;
+ } ioapic_flags;
+ uint32_t ioapic_address; // physical address (same for all procs)
+} __attribute__((packed));
+
+
+struct mp_table_io_interrupt_assignment {
+ uint8_t entry_type; // type 3
+ uint8_t interrupt_type; // 0=int, 1=nmi, 2=smi, 3=ExtInt(8259)
+ union {
+ uint16_t data;
+ struct {
+ uint8_t po:2; // polarity (00=default for bus, 01=active high, 10=reserved, 11=active low
+ uint8_t el:2; // trigger mode (00=default for bus, 01=edge, 10=reserved, 11=level)
+ uint16_t reserved:12;
+ } fields;
+ } io_interrupt_flags;
+ uint8_t source_bus_id;
+ uint8_t source_bus_irq;
+ uint8_t dest_ioapic_id;
+ uint8_t dest_ioapic_intn;
+} __attribute__((packed));
+
+
+struct mp_table_local_interrupt_assignment {
+ uint8_t entry_type; // type 4
+ uint8_t interrupt_type; // 0=int, 1=nmi, 2=smi, 3=ExtInt(8259)
+ union {
+ uint16_t data;
+ struct {
+ uint8_t po:2; // polarity (00=default for bus, 01=active high, 10=reserved, 11=active low
+ uint8_t el:2; // trigger mode (00=default for bus, 01=edge, 10=reserved, 11=level)
+ uint16_t reserved:12;
+ } fields;
+ } io_interrupt_flags;
+ uint8_t source_bus_id;
+ uint8_t source_bus_irq;
+ uint8_t dest_ioapic_id;
+ uint8_t dest_ioapic_intn;
+} __attribute__((packed));
+
+
+
+
+
+static int check_for_cookie(void *target)
+{
+ return 0==memcmp(target,BIOS_MP_TABLE_COOKIE,BIOS_MP_TABLE_COOKIE_LEN);
+}
+
+static int check_table(void *target)
+{
+ uint32_t i;
+ uint8_t sum;
+ struct mp_table_header *header;
+
+ V3_Print("Checksuming mptable header and entries at %p\n",target);
+
+ header=(struct mp_table_header *)target;
+ sum=0;
+ for (i=0;i<header->base_table_length;i++) {
+ sum+=((uint8_t *)target)[i];
+ }
+ if (sum==0) {
+ V3_Print("Checksum passed\n");
+ return 1;
+ } else {
+ V3_Print("Checksum FAILED\n");
+ return 0;
+ }
+}
+
+
+static int check_pointer(void *target)
+{
+ uint32_t i;
+ uint8_t sum;
+ struct mp_floating_pointer *p;
+
+ V3_Print("Checksuming mptable floating pointer at %p\n",target);
+
+ p=(struct mp_floating_pointer *)target;
+ sum=0;
+ for (i=0;i<p->length*16;i++) {
+ sum+=((uint8_t *)target)[i];
+ }
+ if (sum==0) {
+ V3_Print("Checksum passed\n");
+ return 1;
+ } else {
+ V3_Print("Checksum FAILED\n");
+ return 0;
+ }
+}
+
+
+static int write_pointer(void *target, uint32_t mptable_gpa)
+{
+ uint32_t i;
+ uint8_t sum;
+ struct mp_floating_pointer *p=(struct mp_floating_pointer*)target;
+
+ memset((void*)p,0,sizeof(*p));
+
+ memcpy((void*)&(p->signature),POINTER_SIGNATURE,4);
+
+ p->pointer=mptable_gpa;
+ p->length=1; // length in 16 byte chunks
+ p->spec_rev=SPEC_REV;
+
+ // checksum calculation
+ p->checksum=0;
+ sum=0;
+ for (i=0;i<16;i++) {
+ sum+=((uint8_t *)target)[i];
+ }
+ p->checksum=(255-sum)+1;
+
+ V3_Print("MP Floating Pointer written to %p\n",target);
+
+ return 0;
+}
+
+
+
+
+static int write_mptable(void *target, uint32_t numcores)
+{
+ uint32_t i;
+ uint8_t sum;
+ uint8_t core;
+ uint8_t irq;
+ uint8_t *cur;
+ struct mp_table_header *header;
+ struct mp_table_processor *proc;
+ struct mp_table_bus *bus;
+ struct mp_table_ioapic *ioapic;
+ struct mp_table_io_interrupt_assignment *interrupt;
+
+
+ cur=(uint8_t *)target;
+ header=(struct mp_table_header *)cur;
+ cur=cur+sizeof(*header);
+
+ memset((void*)header,0,sizeof(*header));
+
+
+ memcpy(&(header->signature),HEADER_SIGNATURE,4);
+ header->spec_rev=SPEC_REV;
+ memcpy(header->oem_id,OEM_ID,8);
+ memcpy(header->prod_id,PROD_ID,12);
+
+ // n processors, 1 ioapic, 1 isa bus, 16 IRQs = 18+n
+ header->entry_count=numcores+18;
+ header->lapic_addr=LAPIC_ADDR;
+
+ // now we arrange the processors;
+
+ for (core=0;core<numcores;core++, cur+=sizeof(*proc)) {
+ proc=(struct mp_table_processor *)cur;
+ memset((void*)proc,0,sizeof(*proc));
+ proc->entry_type=ENTRY_PROC;
+ proc->lapic_id=core;
+ proc->lapic_version=LAPIC_VERSION;
+ proc->cpu_flags.fields.en=1;
+ proc->cpu_flags.fields.bp = (core==0);
+ proc->cpu_signature.fields.family=PROC_FAMILY;
+ proc->cpu_signature.fields.model=PROC_MODEL;
+ proc->cpu_signature.fields.stepping=PROC_STEPPING;
+ proc->cpu_feature_flags=PROC_FEATURE_FLAGS;
+ }
+
+ // next comes the ISA bas
+ bus=(struct mp_table_bus *)cur;
+ cur+=sizeof(*bus);
+
+ memset((void*)bus,0,sizeof(*bus));
+ bus->entry_type=ENTRY_BUS;
+ bus->bus_id=0;
+ memcpy(bus->bus_type,BUS_ISA,6);
+
+ // next comes the IOAPIC
+ ioapic=(struct mp_table_ioapic *)cur;
+ cur+=sizeof(*ioapic);
+
+ memset((void*)ioapic,0,sizeof(*ioapic));
+ ioapic->entry_type=ENTRY_IOAPIC;
+ ioapic->ioapic_id=numcores;
+ ioapic->ioapic_version=IOAPIC_VERSION;
+ ioapic->ioapic_flags.fields.en=1;
+ ioapic->ioapic_address=IOAPIC_ADDR;
+
+ for (irq=0;irq<16;irq++, cur+=sizeof(*interrupt)) {
+ interrupt=(struct mp_table_io_interrupt_assignment *)cur;
+ memset((void*)interrupt,0,sizeof(*interrupt));
+ interrupt->entry_type=ENTRY_IOINT;
+ interrupt->interrupt_type=INT_TYPE_INT;
+ interrupt->io_interrupt_flags.fields.po=INT_POLARITY_DEFAULT;
+ interrupt->io_interrupt_flags.fields.el=INT_TRIGGER_DEFAULT;
+ interrupt->source_bus_id=0;
+ interrupt->source_bus_irq=irq;
+ interrupt->dest_ioapic_id=1;
+ interrupt->dest_ioapic_intn=irq;
+ }
+
+ // now we can set the length;
+
+ header->base_table_length = (cur-(uint8_t*)header);
+
+ // checksum calculation
+ header->checksum=0;
+ sum=0;
+ for (i=0;i<header->base_table_length;i++) {
+ sum+=((uint8_t *)target)[i];
+ }
+ header->checksum=(255-sum)+1;
+
+
+
+ return 0;
+}
+
+
+int v3_inject_mptable(struct v3_vm_info *vm)
+{
+ void *target;
+
+ if (v3_gpa_to_hva(&(vm->cores[0]),BIOS_MP_TABLE_DEFAULT_LOCATION,(addr_t*)&target)==-1) {
+ PrintError("Cannot inject mptable due to unmapped bios!\n");
+ return -1;
+ }
+
+ if (!check_for_cookie(target)) {
+ PrintError("Cookie mismatch in writing mptable, aborting (probably wrong guest BIOS).\n");
+ return -1;
+ }
+
+ if (vm->num_cores>32) {
+ PrintError("No support for >32 cores in writing MP table, aborting.\n");
+ return -1;
+ }
+
+ V3_Print("Starting mptable pointer, header, and entry construction for %u cores at %p\n",vm->num_cores,target);
+
+ if (-1==write_pointer(target,BIOS_MP_TABLE_DEFAULT_LOCATION+sizeof(struct mp_floating_pointer))) {
+ PrintError("Unable to write mptable floating pointer, aborting.\n");
+ return -1;
+ }
+
+ if (!check_pointer(target)) {
+ PrintError("Failed to inject mptable floating pointer correctly --- checksum fails\n");
+ return -1;
+ }
+
+ if (-1==write_mptable(target+sizeof(struct mp_floating_pointer),vm->num_cores)) {
+ PrintError("Cannot inject mptable configuration header and entries\n");
+ return -1;
+ }
+
+ if (!check_table(target+sizeof(struct mp_floating_pointer))) {
+ PrintError("Failed to inject mptable configuration header and entries correctly --- checksum fails\n");
+ return -1;
+ }
+
+ V3_Print("Done with mptable pointer, header, and entry construction\n");
+
+ return 0;
+
+}
V3_Print("Exit information for Core %d\n", core->cpu_id);
+ if (!node) {
+ V3_Print("No information yet for this core\n");
+ continue;
+ }
+
do {
evt = rb_entry(node, struct exit_event, tree_node);
const char * code_str = vmexit_code_to_str(evt->exit_code);