2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2010, Peter Dinda <pdinda@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Peter Dinda <pdinda@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_string.h>
22 #include <palacios/vm_guest_mem.h>
26 The guest bios is compiled with blank space for am MP table
27 at a default address. A cookie value is temporarily placed
28 there so we can verify it exists. If it does, we overwrite
29 the MP table based on the configuration we are given in the
32 Currently, we set up n identical processors (based on
33 number of cores in guest info), with apics 0..n-1, and
34 ioapic as n. The ISA interrupt lines map to pins 0..15
35 of the first ioapic. PCI bus lines map to pins 16..19
36 of the first ioapic. The system supports virtual wire
37 compability mode and symmetric mode. PIC mode is not supported.
39 The expectation is that the target will have
40 8 bytes (for ___HVMMP signature) followed by 896 bytes of space
41 for a total of 904 bytes of space.
42 We write the floating pointer at target (16 bytes),
43 immediately followed by the mp config header, followed by
48 #define BIOS_MP_TABLE_DEFAULT_LOCATION 0xfcc00 // guest physical (linear)
49 #define BIOS_MP_TABLE_COOKIE "___HVMMP"
50 #define BIOS_MP_TABLE_COOKIE_LEN 8
52 #define POINTER_SIGNATURE "_MP_"
53 #define HEADER_SIGNATURE "PCMP"
55 #define SPEC_REV ((uint8_t)0x4)
56 #define OEM_ID "V3VEE "
57 #define PROD_ID "PALACIOS 1.3 "
59 #define LAPIC_ADDR 0xfee00000
60 #define LAPIC_VERSION 0x11
64 #define ENTRY_IOAPIC 2
68 #define IOAPIC_ADDR 0xfec00000
69 #define IOAPIC_VERSION 0x11
71 // These are bochs defaults - should really come from cpuid of machne
72 #define PROC_FAMILY 0x6
73 #define PROC_STEPPING 0x0
74 #define PROC_MODEL 0x0
75 #define PROC_FEATURE_FLAGS 0x00000201
78 #define BUS_ISA "ISA "
79 #define BUS_PCI "PCI "
81 #define INT_TYPE_INT 0
82 #define INT_TYPE_NMI 1
83 #define INT_TYPE_SMI 2
84 #define INT_TYPE_EXT 3
86 #define INT_POLARITY_DEFAULT 0
87 #define INT_POLARITY_ACTIVE_HIGH 1
88 #define INT_POLARITY_RESERVED 2
89 #define INT_POLARITY_ACTIVE_LOW 3
91 #define INT_TRIGGER_DEFAULT 0
92 #define INT_TRIGGER_EDGE 1
93 #define INT_TRIGGER_RESERVED 2
94 #define INT_TRIGGER_LEVEL 3
99 // This points to the mp table header
100 struct mp_floating_pointer {
101 uint32_t signature; /* "_MP_" */
102 uint32_t pointer; /* gpa of MP table (0xfcc00) */
103 uint8_t length; /* length in 16 byte chunks (paragraphs) */
104 uint8_t spec_rev; /* 0x4 */
106 uint8_t mp_featurebyte[5]; /* zero out to indicate mp config table
107 first byte nonzero => default configurations (see spec)
108 second byte, bit 7 (top bit) = IMCR if set, virtual wire if zero */
109 } __attribute__((packed));
112 struct mp_table_header {
113 uint32_t signature; /* "PCMP" */
114 uint16_t base_table_length; /* bytes, starting from header */
115 uint8_t spec_rev; /* specification rvision (0x4 is the current rev) */
116 uint8_t checksum; /* sum of all bytes, including checksum, must be zero */
117 uint8_t oem_id[8]; /* OEM ID "V3VEE " */
118 uint8_t prod_id[12]; /* Product ID "PALACIOS 1.3" */
119 uint32_t oem_table_ptr; /* oem table, if used (zeroed) */
120 uint16_t oem_table_size; /* oem table length, if used */
121 uint16_t entry_count; /* numnber of entries in this table */
122 uint32_t lapic_addr; /* apic address on all processors */
123 uint16_t extended_table_length; /* zero by default */
124 uint8_t extended_table_checksum; /* zero by default */
125 uint8_t reserved; /* zero by default */
126 /* this is followed by entries of the various types indicated below */
127 } __attribute__((packed));
129 struct mp_table_processor {
130 uint8_t entry_type; // type 0
131 uint8_t lapic_id; // 0..
132 uint8_t lapic_version; //
137 uint8_t en : 1; /* 1 = processor enabled */
138 uint8_t bp : 1; /* 1 = bootstrap processor */
139 uint8_t reserved : 6;
140 } __attribute__((packed));
141 } __attribute__((packed)) cpu_flags;
146 uint8_t stepping : 4;
150 } __attribute__((packed));
151 } __attribute__((packed)) cpu_signature;
153 uint32_t cpu_feature_flags; /* result of CPUID */
154 uint32_t reserved[2];
155 } __attribute__((packed));
157 struct mp_table_bus {
158 uint8_t entry_type; /* type 1 */
159 uint8_t bus_id; /* 0.. */
160 uint8_t bus_type[6]; /* "PCI" "INTERN", etc */
161 } __attribute__((packed));
164 struct mp_table_ioapic {
165 uint8_t entry_type; /* type 2 */
166 uint8_t ioapic_id; /* 0.. */
167 uint8_t ioapic_version; /* bits 0..7 of the version register */
172 uint8_t en : 1; /* 1=ioapic enabled */
173 uint8_t reserved : 7;
174 } __attribute__((packed));
175 } __attribute__((packed)) ioapic_flags;
177 uint32_t ioapic_address; /* physical address (same for all procs) */
178 } __attribute__((packed));
181 struct mp_table_io_interrupt_assignment {
182 uint8_t entry_type; /* type 3 */
183 uint8_t interrupt_type; /* 0=int, 1=nmi, 2=smi, 3=ExtInt(8259) */
188 uint8_t po : 2; /* polarity (00 = default for bus, 01 = active high, 10 = reserved, 11 = active low */
189 uint8_t el : 2; /* trigger mode (00 = default for bus, 01 = edge, 10 = reserved, 11 = level) */
190 uint16_t reserved : 12;
191 } __attribute__((packed));
192 } __attribute__((packed)) flags;
194 uint8_t source_bus_id;
195 uint8_t source_bus_irq;
196 uint8_t dest_ioapic_id;
197 uint8_t dest_ioapic_intn;
198 } __attribute__((packed));
201 struct mp_table_local_interrupt_assignment {
202 uint8_t entry_type; /* type 4 */
203 uint8_t interrupt_type; /* 0 = int, 1 = nmi, 2 = smi, 3 = ExtInt(8259) */
208 uint8_t po : 2; /* polarity (00 = default for bus, 01 = active high, 10 = reserved, 11 = active low */
209 uint8_t el : 2; /* trigger mode (00 = default for bus, 01 = edge, 10 = reserved, 11 = level) */
210 uint16_t reserved : 12;
211 } __attribute__((packed));
212 } __attribute__((packed)) flags;
214 uint8_t source_bus_id;
215 uint8_t source_bus_irq;
216 uint8_t dest_ioapic_id;
217 uint8_t dest_ioapic_intn;
218 } __attribute__((packed));
222 #define NUM_PCI_SLOTS 8
225 static inline int check_for_cookie(void * target) {
226 return (memcmp(target, BIOS_MP_TABLE_COOKIE, BIOS_MP_TABLE_COOKIE_LEN) == 0);
229 static inline int check_table(void * target) {
232 struct mp_table_header * header;
234 header = (struct mp_table_header *)target;
237 for (i = 0; i < header->base_table_length; i++) {
238 sum += ((uint8_t *)target)[i];
250 static inline int check_pointer(void * target) {
253 struct mp_floating_pointer * p;
255 p = (struct mp_floating_pointer *)target;
258 for (i = 0; i < p->length * 16; i++) {
259 sum += ((uint8_t *)target)[i];
272 static int write_pointer(void * target, uint32_t mptable_gpa) {
275 struct mp_floating_pointer * p = (struct mp_floating_pointer *)target;
277 memset((void *)p, 0, sizeof(struct mp_floating_pointer));
279 memcpy((void *)&(p->signature), POINTER_SIGNATURE, 4);
281 p->pointer = mptable_gpa;
282 p->length = 1; // length in 16 byte chunks
283 p->spec_rev = SPEC_REV;
285 // The remaining zeros indicate that an MP config table is present
286 // and that virtual wire mode is implemented (not PIC mode)
287 // Either virtual wire or PIC must be implemented in addition to
288 // symmetric I/O mode
290 // checksum calculation
294 for (i = 0; i < 16; i++) {
295 sum += ((uint8_t *)target)[i];
298 p->checksum = (255 - sum) + 1;
306 static int write_mptable(void * target, uint32_t numcores, int have_ioapic, int have_pci) {
311 struct mp_table_header * header = NULL;
312 struct mp_table_processor * proc = NULL;
313 struct mp_table_bus * bus = NULL;
314 struct mp_table_ioapic * ioapic = NULL;
315 struct mp_table_io_interrupt_assignment * interrupt = NULL;
316 uint8_t * cur = target;
318 header = (struct mp_table_header *)cur;
319 cur = cur + sizeof(struct mp_table_header);
321 memset((void *)header, 0, sizeof(struct mp_table_header));
324 memcpy(&(header->signature), HEADER_SIGNATURE, 4);
325 header->spec_rev = SPEC_REV;
326 memcpy(header->oem_id, OEM_ID, 8);
327 memcpy(header->prod_id, PROD_ID, 12);
329 // numcores entries for apics, one entry for ioapic (if it exists)
330 // one entry for isa bus (if ioapic exists), one entry for pci bus (if exists),
331 // 16 entries for isa irqs (if ioapic exists) + num_slots*num_intr pci irqs
332 // (if ioapic and pci exist)
333 header->entry_count = numcores + !!have_ioapic + !!have_ioapic + !!have_pci +
334 16*(!!have_ioapic) + NUM_PCI_SLOTS * 4 * (!!have_pci) * (!!have_ioapic);
336 header->lapic_addr = LAPIC_ADDR;
338 // now we arrange the processors;
339 for (core = 0; core < numcores; core++) {
340 proc = (struct mp_table_processor *)cur;
341 memset((void *)proc, 0, sizeof(struct mp_table_processor));
342 proc->entry_type = ENTRY_PROC;
343 proc->lapic_id = core;
344 proc->lapic_version = LAPIC_VERSION;
345 proc->cpu_flags.en = 1;
348 proc->cpu_flags.bp = 1;
350 proc->cpu_flags.bp = 0;
353 proc->cpu_signature.family = PROC_FAMILY;
354 proc->cpu_signature.model = PROC_MODEL;
355 proc->cpu_signature.stepping = PROC_STEPPING;
356 proc->cpu_feature_flags = PROC_FEATURE_FLAGS;
358 cur += sizeof(struct mp_table_processor);
361 // PCI bus is always zero
363 bus = (struct mp_table_bus *)cur;
364 cur += sizeof(struct mp_table_bus);
366 memset((void *)bus, 0, sizeof(struct mp_table_bus));
367 bus->entry_type = ENTRY_BUS;
369 memcpy(bus->bus_type, BUS_PCI, 6);
372 // next comes the ISA bus (bus one)
373 bus = (struct mp_table_bus *)cur;
374 cur += sizeof(struct mp_table_bus);
376 memset((void *)bus, 0, sizeof(struct mp_table_bus));
377 bus->entry_type = ENTRY_BUS;
379 memcpy(bus->bus_type, BUS_ISA, 6);
382 // next comes the IOAPIC
384 ioapic = (struct mp_table_ioapic *)cur;
385 cur += sizeof(struct mp_table_ioapic);
387 memset((void *)ioapic, 0, sizeof(struct mp_table_ioapic));
388 ioapic->entry_type = ENTRY_IOAPIC;
389 ioapic->ioapic_id = numcores;
390 ioapic->ioapic_version = IOAPIC_VERSION;
391 ioapic->ioapic_flags.en = 1;
392 ioapic->ioapic_address = IOAPIC_ADDR;
396 // LEGACY ISA IRQ mappings
397 // The MPTABLE IRQ mappings are kind of odd.
398 // We don't include a bus IRQ 2, and instead remap Bus IRQ 0 to dest irq 2
399 // The idea here is that the timer hooks to 2, while the PIC hooks
400 // to zero in ExtInt mode. This makes it possible to do virtual wire
401 // mode via the ioapic.
403 // Note that the timer connects to pin 2 of the IOAPIC. Sadly,
404 // the timer is unaware of this and just raises irq 0. The ioapic
405 // transforms this to a pin 2 interrupt. If we want the PIC
406 // to be able to channel interrupts via pin 0, we need a separate
409 for (irq = 0; irq < 16; irq++) {
410 uint8_t dst_irq = irq;
414 } else if (irq == 2) {
418 interrupt = (struct mp_table_io_interrupt_assignment *)cur;
419 memset((void *)interrupt, 0, sizeof(struct mp_table_io_interrupt_assignment));
421 interrupt->entry_type = ENTRY_IOINT;
422 interrupt->interrupt_type = INT_TYPE_INT;
423 interrupt->flags.po = INT_POLARITY_DEFAULT;
424 interrupt->flags.el = INT_TRIGGER_DEFAULT;
425 interrupt->source_bus_id = 1;
426 interrupt->source_bus_irq = irq;
427 interrupt->dest_ioapic_id = numcores;
428 interrupt->dest_ioapic_intn = dst_irq;
430 cur += sizeof(struct mp_table_io_interrupt_assignment);
434 if (have_pci && have_ioapic) {
435 // Interrupt redirection entries for PCI bus
437 // We need an entry for each slot+pci interrupt
438 // There can be 32 slots, each of which can use 4 interrupts
439 // Thus there are 128 entries
441 // In this simple setup, we map
442 // slot i, intr j (both zero based) to pci_irq[(i+j)%4]
445 static uint8_t pci_irq[4] = {16,17,18,19};
447 for (slot=0;slot<NUM_PCI_SLOTS;slot++) {
448 for (intr=0;intr<4;intr++) {
450 uint8_t dst_irq = pci_irq[(slot+intr)%4];
452 interrupt = (struct mp_table_io_interrupt_assignment *)cur;
453 memset((void *)interrupt, 0, sizeof(struct mp_table_io_interrupt_assignment));
455 interrupt->entry_type = ENTRY_IOINT;
456 interrupt->interrupt_type = INT_TYPE_INT;
457 interrupt->flags.po = INT_POLARITY_DEFAULT;
458 interrupt->flags.el = INT_TRIGGER_DEFAULT;
459 interrupt->source_bus_id = 0;
460 // Yes, this is how you encode the slot and pin of a PCI device
461 // As we all know, bits are expensive
462 // We can have as many as 32 slots, but to get that large,
463 // we would need to tweak the bios's landing zone for the mptable
464 interrupt->source_bus_irq = (slot<<2) | intr ;
465 interrupt->dest_ioapic_id = numcores;
466 interrupt->dest_ioapic_intn = dst_irq;
468 cur += sizeof(struct mp_table_io_interrupt_assignment);
470 //V3_Print(VM_NONE, VCORE_NONE, "PCI0, slot %d, irq %d maps to irq %d\n",slot,intr,dst_irq);
475 // now we can set the length;
477 header->base_table_length = (cur - (uint8_t *)header);
479 V3_Print(VM_NONE, VCORE_NONE, "MPtable size: %u\n",header->base_table_length);
481 // checksum calculation
482 header->checksum = 0;
484 for (i = 0; i < header->base_table_length; i++) {
485 sum += ((uint8_t *)target)[i];
487 header->checksum = (255 - sum) + 1;
493 static v3_cfg_tree_t *find_first_peer_device_of_class(v3_cfg_tree_t *themptablenode, char *theclass)
495 v3_cfg_tree_t *p=themptablenode->parent;
503 for (c=v3_xml_child(p,"device");
504 c && strcasecmp(v3_cfg_val(c,"class"),theclass);
514 static int mptable_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
515 void * target = NULL;
517 int have_pci = find_first_peer_device_of_class(cfg,"pci")!=NULL;
518 int have_piix3 = find_first_peer_device_of_class(cfg,"piix3")!=NULL;
519 int have_apic = find_first_peer_device_of_class(cfg,"lapic")!=NULL;
520 int have_ioapic = find_first_peer_device_of_class(cfg,"ioapic")!=NULL;
522 uint32_t num_cores = vm->num_cores;
525 num_cores = v3_get_hvm_ros_cores(vm);
529 PrintError(vm, VCORE_NONE, "Attempt to instantiate MPTABLE but machine has no apics!\n");
534 PrintError(vm, VCORE_NONE, "Attempt to instantiate MPTABLE without ioapic - will try, but this won't end well\n");
537 if (have_pci && (!have_piix3 || !have_ioapic)) {
538 PrintError(vm, VCORE_NONE, "Attempt to instantiate MPTABLE with a PCI Bus, but without either a piix3 or an ioapic\n");
542 if (v3_gpa_to_hva(&(vm->cores[0]), BIOS_MP_TABLE_DEFAULT_LOCATION, (addr_t *)&target) == -1) {
543 PrintError(vm, VCORE_NONE, "Cannot inject mptable due to unmapped bios!\n");
547 if (!check_for_cookie(target)) {
548 PrintError(vm, VCORE_NONE, "Cookie mismatch in writing mptable, aborting (probably just wrong guest BIOS, so this is not a hard error).\n");
549 // we pretend we were sucesssful
553 if (num_cores > 32) {
554 PrintError(vm, VCORE_NONE, "No support for >32 cores in writing MP table, aborting.\n");
558 V3_Print(vm, VCORE_NONE, "Constructing mptable for %u cores at %p\n", num_cores, target);
560 if (write_pointer(target, BIOS_MP_TABLE_DEFAULT_LOCATION + sizeof(struct mp_floating_pointer)) == -1) {
561 PrintError(vm, VCORE_NONE, "Unable to write mptable floating pointer, aborting.\n");
565 if (!check_pointer(target)) {
566 PrintError(vm, VCORE_NONE, "Failed to inject mptable floating pointer correctly --- checksum fails\n");
570 if (write_mptable(target + sizeof(struct mp_floating_pointer), num_cores, have_ioapic, have_pci)) {
571 PrintError(vm, VCORE_NONE, "Cannot inject mptable configuration header and entries\n");
575 if (!check_table(target + sizeof(struct mp_floating_pointer))) {
576 PrintError(vm, VCORE_NONE, "Failed to inject mptable configuration header and entries correctly --- checksum fails\n");
586 device_register("MPTABLE", mptable_init)