2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2009, Lei Xia <lxia@northwestern.edu>
11 * Copyright (c) 2009, Chang Seok Bae <jhuell@gmail.com>
12 * Copyright (c) 2009, Jack Lange <jarusl@cs.northwestern.edu>
13 * Copyright (c) 2009, The V3VEE Project <http://www.v3vee.org>
14 * All rights reserved.
16 * Author: Lei Xia <lxia@northwestern.edu>
17 * Chang Seok Bae <jhuell@gmail.com>
18 * Jack Lange <jarusl@cs.northwestern.edu>
20 * This is free software. You are permitted to use,
21 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
26 #include <palacios/vmm.h>
27 #include <palacios/vmm_types.h>
28 #include <palacios/vmm_io.h>
29 #include <palacios/vmm_intr.h>
30 #include <palacios/vmm_rbtree.h>
32 #include <devices/pci.h>
33 #include <devices/pci_types.h>
37 #define PrintDebug(fmt, args...)
41 #define CONFIG_ADDR_PORT 0x0cf8
42 #define CONFIG_DATA_PORT 0x0cfc
45 #define PCI_BUS_COUNT 1
47 // This must always be a multiple of 8
48 #define MAX_BUS_DEVICES 32
61 } __attribute__((packed));
62 } __attribute__((packed));
63 } __attribute__((packed));
72 // Red Black tree containing all attached devices
73 struct rb_root devices;
75 // Bitmap of the allocated device numbers
76 uint8_t dev_map[MAX_BUS_DEVICES / 8];
78 int (*raise_pci_irq)(struct vm_device * dev, struct pci_device * pci_dev);
79 int (*lower_pci_irq)(struct vm_device * dev, struct pci_device * pci_dev);
80 struct vm_device * irq_bridge_dev;
86 // Configuration address register
87 struct pci_addr_reg addr_reg;
90 struct pci_bus bus_list[PCI_BUS_COUNT];
99 static void pci_dump_state(struct pci_internal * pci_state) {
100 struct rb_node * node = v3_rb_first(&(pci_state->bus_list[0].devices));
101 struct pci_device * tmp_dev = NULL;
103 PrintDebug("===PCI: Dumping state Begin ==========\n");
106 tmp_dev = rb_entry(node, struct pci_device, dev_tree_node);
108 PrintDebug("PCI Device Number: %d (%s):\n", tmp_dev->dev_num, tmp_dev->name);
109 PrintDebug("irq = %d\n", tmp_dev->config_header.intr_line);
110 PrintDebug("Vend ID: 0x%x\n", tmp_dev->config_header.vendor_id);
111 PrintDebug("Device ID: 0x%x\n", tmp_dev->config_header.device_id);
113 } while ((node = v3_rb_next(node)));
115 PrintDebug("====PCI: Dumping state End==========\n");
123 // Scan the dev_map bitmap for the first '0' bit
124 static int get_free_dev_num(struct pci_bus * bus) {
127 for (i = 0; i < sizeof(bus->dev_map); i++) {
128 PrintDebug("i=%d\n", i);
129 if (bus->dev_map[i] != 0xff) {
131 for (j = 0; j < 8; j++) {
132 PrintDebug("\tj=%d\n", j);
133 if (!(bus->dev_map[i] & (0x1 << j))) {
134 return ((i * 8) + j);
143 static void allocate_dev_num(struct pci_bus * bus, int dev_num) {
144 int major = (dev_num / 8);
145 int minor = dev_num % 8;
147 bus->dev_map[major] |= (0x1 << minor);
153 struct pci_device * __add_device_to_bus(struct pci_bus * bus, struct pci_device * dev) {
155 struct rb_node ** p = &(bus->devices.rb_node);
156 struct rb_node * parent = NULL;
157 struct pci_device * tmp_dev = NULL;
161 tmp_dev = rb_entry(parent, struct pci_device, dev_tree_node);
163 if (dev->devfn < tmp_dev->devfn) {
165 } else if (dev->devfn > tmp_dev->devfn) {
172 rb_link_node(&(dev->dev_tree_node), parent, p);
179 struct pci_device * add_device_to_bus(struct pci_bus * bus, struct pci_device * dev) {
181 struct pci_device * ret = NULL;
183 if ((ret = __add_device_to_bus(bus, dev))) {
187 v3_rb_insert_color(&(dev->dev_tree_node), &(bus->devices));
189 allocate_dev_num(bus, dev->dev_num);
195 static struct pci_device * get_device(struct pci_bus * bus, uint8_t dev_num, uint8_t fn_num) {
196 struct rb_node * n = bus->devices.rb_node;
197 struct pci_device * dev = NULL;
198 uint8_t devfn = ((dev_num & 0x1f) << 3) | (fn_num & 0x7);
201 dev = rb_entry(n, struct pci_device, dev_tree_node);
203 if (devfn < dev->devfn) {
205 } else if (devfn > dev->devfn) {
221 static int addr_port_read(ushort_t port, void * dst, uint_t length, struct vm_device * dev) {
222 struct pci_internal * pci_state = (struct pci_internal *)dev->private_data;
223 int reg_offset = port & 0x3;
224 uint8_t * reg_addr = ((uint8_t *)&(pci_state->addr_reg.val)) + reg_offset;
226 PrintDebug("Reading PCI Address Port (%x): %x len=%d\n", port, pci_state->addr_reg.val, length);
229 if (reg_offset != 0) {
230 PrintError("Invalid Address Port Read\n");
233 *(uint32_t *)dst = *(uint32_t *)reg_addr;
234 } else if (length == 2) {
235 if (reg_offset > 2) {
236 PrintError("Invalid Address Port Read\n");
239 *(uint16_t *)dst = *(uint16_t *)reg_addr;
240 } else if (length == 1) {
241 *(uint8_t *)dst = *(uint8_t *)reg_addr;
243 PrintError("Invalid read length (%d) for PCI address register\n", length);
252 static int addr_port_write(ushort_t port, void * src, uint_t length, struct vm_device * dev) {
253 struct pci_internal * pci_state = (struct pci_internal *)dev->private_data;
254 int reg_offset = port & 0x3;
255 uint8_t * reg_addr = ((uint8_t *)&(pci_state->addr_reg.val)) + reg_offset;
259 if (reg_offset != 0) {
260 PrintError("Invalid Address Port Write\n");
264 PrintDebug("Writing PCI 4 bytes Val=%x\n", *(uint32_t *)src);
266 *(uint32_t *)reg_addr = *(uint32_t *)src;
267 } else if (length == 2) {
268 if (reg_offset > 2) {
269 PrintError("Invalid Address Port Write\n");
273 PrintDebug("Writing PCI 2 byte Val=%x\n", *(uint16_t *)src);
275 *(uint16_t *)reg_addr = *(uint16_t *)src;
276 } else if (length == 1) {
277 PrintDebug("Writing PCI 1 byte Val=%x\n", *(uint8_t *)src);
278 *(uint8_t *)reg_addr = *(uint8_t *)src;
280 PrintError("Invalid write length (%d) for PCI address register\n", length);
284 PrintDebug("Writing PCI Address Port(%x): %x\n", port, pci_state->addr_reg.val);
290 static int data_port_read(ushort_t port, void * dst, uint_t length, struct vm_device * vmdev) {
291 struct pci_internal * pci_state = (struct pci_internal *)(vmdev->private_data);
292 struct pci_device * pci_dev = NULL;
293 uint_t reg_num = (pci_state->addr_reg.reg_num << 2) + (port & 0x3);
296 if (pci_state->addr_reg.bus_num != 0) {
298 for (i = 0; i < length; i++) {
299 *((uint8_t *)dst + i) = 0xff;
305 PrintDebug("Reading PCI Data register. bus = %d, dev = %d, reg = %d (%x), cfg_reg = %x\n",
306 pci_state->addr_reg.bus_num,
307 pci_state->addr_reg.dev_num,
309 pci_state->addr_reg.val);
311 pci_dev = get_device(&(pci_state->bus_list[0]), pci_state->addr_reg.dev_num, pci_state->addr_reg.fn_num);
313 if (pci_dev == NULL) {
314 for (i = 0; i < length; i++) {
315 *(uint8_t *)((uint8_t *)dst + i) = 0xff;
321 for (i = 0; i < length; i++) {
322 *(uint8_t *)((uint8_t *)dst + i) = pci_dev->config_space[reg_num + i];
325 PrintDebug("\tVal=%x, len=%d\n", *(uint32_t *)dst, length);
331 static inline int is_cfg_reg_writable(uchar_t header_type, int reg_num) {
332 if (header_type == 0x00) {
350 } else if (header_type == 0x80) {
369 // PCI to PCI Bridge = 0x01
370 // CardBus Bridge = 0x02
373 PrintError("Invalid PCI Header type (0x%.2x)\n", header_type);
380 static int bar_update(struct pci_device * pci, int bar_num, uint32_t new_val) {
381 struct v3_pci_bar * bar = &(pci->bar[bar_num]);
383 PrintDebug("Updating BAR Register (Dev=%s) (bar=%d) (old_val=%x) (new_val=%x)\n",
384 pci->name, bar_num, bar->val, new_val);
390 PrintDebug("\tRehooking %d IO ports from base %x to %x\n",
391 bar->num_ports, PCI_IO_BASE(bar->val), PCI_IO_BASE(new_val));
393 // only do this if pci device is enabled....
394 for (i = 0; i < bar->num_ports; i++) {
396 v3_dev_unhook_io(pci->vm_dev, PCI_IO_BASE(bar->val) + i);
398 v3_dev_hook_io(pci->vm_dev, PCI_IO_BASE(new_val) + i,
399 bar->io_read, bar->io_write);
406 case PCI_BAR_MEM32: {
407 v3_unhook_mem(pci->vm_dev->vm, (addr_t)(bar->val));
410 v3_hook_full_mem(pci->vm_dev->vm, PCI_MEM32_BASE(new_val),
411 PCI_MEM32_BASE(new_val) + (bar->num_pages * PAGE_SIZE_4KB),
412 bar->mem_read, bar->mem_write, pci->vm_dev);
414 PrintError("Write hooks not supported for PCI\n");
423 PrintDebug("Reprogramming an unsupported BAR register (Dev=%s) (bar=%d) (val=%x)\n",
424 pci->name, bar_num, new_val);
428 PrintError("Invalid Bar Reg updated (bar=%d)\n", bar_num);
436 static int data_port_write(ushort_t port, void * src, uint_t length, struct vm_device * vmdev) {
437 struct pci_internal * pci_state = (struct pci_internal *)vmdev->private_data;
438 struct pci_device * pci_dev = NULL;
439 uint_t reg_num = (pci_state->addr_reg.reg_num << 2) + (port & 0x3);
443 if (pci_state->addr_reg.bus_num != 0) {
447 PrintDebug("Writing PCI Data register. bus = %d, dev = %d, fn = %d, reg = %d (%x) addr_reg = %x (val=%x, len=%d)\n",
448 pci_state->addr_reg.bus_num,
449 pci_state->addr_reg.dev_num,
450 pci_state->addr_reg.fn_num,
452 pci_state->addr_reg.val,
453 *(uint32_t *)src, length);
456 pci_dev = get_device(&(pci_state->bus_list[0]), pci_state->addr_reg.dev_num, pci_state->addr_reg.fn_num);
458 if (pci_dev == NULL) {
459 PrintError("Writing configuration space for non-present device (dev_num=%d)\n",
460 pci_state->addr_reg.dev_num);
465 for (i = 0; i < length; i++) {
466 uint_t cur_reg = reg_num + i;
467 int writable = is_cfg_reg_writable(pci_dev->config_header.header_type, cur_reg);
469 if (writable == -1) {
470 PrintError("Invalid PCI configuration space\n");
475 pci_dev->config_space[cur_reg] = *(uint8_t *)((uint8_t *)src + i);
477 if ((cur_reg >= 0x10) && (cur_reg < 0x28)) {
478 // BAR Register Update
479 int bar_reg = ((cur_reg & ~0x3) - 0x10) / 4;
481 pci_dev->bar_update_flag = 1;
482 pci_dev->bar[bar_reg].updated = 1;
484 // PrintDebug("Updating BAR register %d\n", bar_reg);
486 } else if ((cur_reg >= 0x30) && (cur_reg < 0x34)) {
487 // Extension ROM update
489 pci_dev->ext_rom_update_flag = 1;
490 } else if (cur_reg == 0x04) {
492 uint8_t command = *((uint8_t *)src + i);
494 PrintError("command update for %s old=%x new=%x\n",
496 pci_dev->config_space[cur_reg],command);
498 pci_dev->config_space[cur_reg] = command;
500 if (pci_dev->cmd_update) {
501 pci_dev->cmd_update(pci_dev, (command & 0x01), (command & 0x02));
504 } else if (cur_reg == 0x0f) {
506 pci_dev->config_header.BIST = 0x00;
509 PrintError("PCI Write to read only register %d\n", cur_reg);
513 if (pci_dev->config_update) {
514 pci_dev->config_update(pci_dev, reg_num, length);
517 // Scan for BAR updated
518 if (pci_dev->bar_update_flag) {
519 for (i = 0; i < 6; i++) {
520 if (pci_dev->bar[i].updated) {
521 int bar_offset = 0x10 + 4 * i;
523 *(uint32_t *)(pci_dev->config_space + bar_offset) &= pci_dev->bar[i].mask;
524 // check special flags....
527 if (bar_update(pci_dev, i, *(uint32_t *)(pci_dev->config_space + bar_offset)) == -1) {
528 PrintError("PCI Device %s: Bar update Error Bar=%d\n", pci_dev->name, i);
532 pci_dev->bar[i].updated = 0;
535 pci_dev->bar_update_flag = 0;
538 if ((pci_dev->ext_rom_update_flag) && (pci_dev->ext_rom_update)) {
539 pci_dev->ext_rom_update(pci_dev);
540 pci_dev->ext_rom_update_flag = 0;
549 static int pci_reset_device(struct vm_device * dev) {
550 PrintDebug("pci: reset device\n");
555 static int pci_start_device(struct vm_device * dev) {
556 PrintDebug("pci: start device\n");
561 static int pci_stop_device(struct vm_device * dev) {
562 PrintDebug("pci: stop device\n");
568 static int pci_free(struct vm_device * dev) {
571 for (i = 0; i < 4; i++){
572 v3_dev_unhook_io(dev, CONFIG_ADDR_PORT + i);
573 v3_dev_unhook_io(dev, CONFIG_DATA_PORT + i);
581 static void init_pci_busses(struct pci_internal * pci_state) {
584 for (i = 0; i < PCI_BUS_COUNT; i++) {
585 pci_state->bus_list[i].bus_num = i;
586 pci_state->bus_list[i].devices.rb_node = NULL;
587 memset(pci_state->bus_list[i].dev_map, 0, sizeof(pci_state->bus_list[i].dev_map));
594 static struct v3_device_ops dev_ops = {
596 .reset = pci_reset_device,
597 .start = pci_start_device,
598 .stop = pci_stop_device,
604 static int pci_init(struct guest_info * vm, void * cfg_data) {
605 struct pci_internal * pci_state = V3_Malloc(sizeof(struct pci_internal));
608 PrintDebug("PCI internal at %p\n",(void *)pci_state);
610 struct vm_device * dev = v3_allocate_device("PCI", &dev_ops, pci_state);
612 if (v3_attach_device(vm, dev) == -1) {
613 PrintError("Could not attach device %s\n", "PCI");
618 pci_state->addr_reg.val = 0;
620 init_pci_busses(pci_state);
622 PrintDebug("Sizeof config header=%d\n", (int)sizeof(struct pci_config_header));
624 for (i = 0; i < 4; i++) {
625 v3_dev_hook_io(dev, CONFIG_ADDR_PORT + i, &addr_port_read, &addr_port_write);
626 v3_dev_hook_io(dev, CONFIG_DATA_PORT + i, &data_port_read, &data_port_write);
633 device_register("PCI", pci_init)
636 static inline int init_bars(struct pci_device * pci_dev) {
639 for (i = 0; i < 6; i++) {
640 int bar_offset = 0x10 + (4 * i);
642 if (pci_dev->bar[i].type == PCI_BAR_IO) {
644 pci_dev->bar[i].mask = (~((pci_dev->bar[i].num_ports) - 1)) | 0x01;
646 pci_dev->bar[i].val = pci_dev->bar[i].default_base_port & pci_dev->bar[i].mask;
647 pci_dev->bar[i].val |= 0x00000001;
649 for (j = 0; j < pci_dev->bar[i].num_ports; j++) {
651 if (pci_dev->bar[i].default_base_port != 0xffff) {
652 if (v3_dev_hook_io(pci_dev->vm_dev, pci_dev->bar[i].default_base_port + j,
653 pci_dev->bar[i].io_read, pci_dev->bar[i].io_write) == -1) {
654 PrintError("Could not hook default io port %x\n", pci_dev->bar[i].default_base_port + j);
660 *(uint32_t *)(pci_dev->config_space + bar_offset) = pci_dev->bar[i].val;
662 } else if (pci_dev->bar[i].type == PCI_BAR_MEM32) {
663 pci_dev->bar[i].mask = ~((pci_dev->bar[i].num_pages << 12) - 1);
664 pci_dev->bar[i].mask |= 0xf; // preserve the configuration flags
666 pci_dev->bar[i].val = pci_dev->bar[i].default_base_addr & pci_dev->bar[i].mask;
669 if (pci_dev->bar[i].mem_read) {
671 v3_hook_full_mem(pci_dev->vm_dev->vm, pci_dev->bar[i].default_base_addr,
672 pci_dev->bar[i].default_base_addr + (pci_dev->bar[i].num_pages * PAGE_SIZE_4KB),
673 pci_dev->bar[i].mem_read, pci_dev->bar[i].mem_write, pci_dev->vm_dev);
674 } else if (pci_dev->bar[i].mem_write) {
676 PrintError("Write hooks not supported for PCI devices\n");
679 v3_hook_write_mem(pci_dev->vm_dev->vm, pci_dev->bar[i].default_base_addr,
680 pci_dev->bar[i].default_base_addr + (pci_dev->bar[i].num_pages * PAGE_SIZE_4KB),
681 pci_dev->bar[i].mem_write, pci_dev->vm_dev);
684 // set the prefetchable flag...
685 pci_dev->bar[i].val |= 0x00000008;
689 *(uint32_t *)(pci_dev->config_space + bar_offset) = pci_dev->bar[i].val;
691 } else if (pci_dev->bar[i].type == PCI_BAR_MEM16) {
692 PrintError("16 Bit memory ranges not supported (reg: %d)\n", i);
694 } else if (pci_dev->bar[i].type == PCI_BAR_NONE) {
695 pci_dev->bar[i].val = 0x00000000;
696 pci_dev->bar[i].mask = 0x00000000; // This ensures that all updates will be dropped
697 *(uint32_t *)(pci_dev->config_space + bar_offset) = pci_dev->bar[i].val;
699 PrintError("Invalid BAR type for bar #%d\n", i);
708 int v3_pci_set_irq_bridge(struct vm_device * pci_bus, int bus_num,
709 int (*raise_pci_irq)(struct vm_device * dev, struct pci_device * pci_dev),
710 int (*lower_pci_irq)(struct vm_device * dev, struct pci_device * pci_dev),
711 struct vm_device * bridge_dev) {
712 struct pci_internal * pci_state = (struct pci_internal *)pci_bus->private_data;
715 pci_state->bus_list[bus_num].raise_pci_irq = raise_pci_irq;
716 pci_state->bus_list[bus_num].lower_pci_irq = lower_pci_irq;
717 pci_state->bus_list[bus_num].irq_bridge_dev = bridge_dev;
722 int v3_pci_raise_irq(struct vm_device * pci_bus, int bus_num, struct pci_device * dev) {
723 struct pci_internal * pci_state = (struct pci_internal *)pci_bus->private_data;
724 struct pci_bus * bus = &(pci_state->bus_list[bus_num]);
726 return bus->raise_pci_irq(bus->irq_bridge_dev, dev);
729 int v3_pci_lower_irq(struct vm_device * pci_bus, int bus_num, struct pci_device * dev) {
730 struct pci_internal * pci_state = (struct pci_internal *)pci_bus->private_data;
731 struct pci_bus * bus = &(pci_state->bus_list[bus_num]);
733 return bus->lower_pci_irq(bus->irq_bridge_dev, dev);
736 // if dev_num == -1, auto assign
737 struct pci_device * v3_pci_register_device(struct vm_device * pci,
738 pci_device_type_t dev_type,
743 struct v3_pci_bar * bars,
744 int (*config_update)(struct pci_device * pci_dev, uint_t reg_num, int length),
745 int (*cmd_update)(struct pci_device *pci_dev, uchar_t io_enabled, uchar_t mem_enabled),
746 int (*ext_rom_update)(struct pci_device * pci_dev),
747 struct vm_device * dev) {
749 struct pci_internal * pci_state = (struct pci_internal *)pci->private_data;
750 struct pci_bus * bus = &(pci_state->bus_list[bus_num]);
751 struct pci_device * pci_dev = NULL;
754 if (dev_num > MAX_BUS_DEVICES) {
755 PrintError("Requested Invalid device number (%d)\n", dev_num);
759 if (dev_num == PCI_AUTO_DEV_NUM) {
760 PrintDebug("Searching for free device number\n");
761 if ((dev_num = get_free_dev_num(bus)) == -1) {
762 PrintError("No more available PCI slots on bus %d\n", bus->bus_num);
767 PrintDebug("Checking for PCI Device\n");
769 if (get_device(bus, dev_num, fn_num) != NULL) {
770 PrintError("PCI Device already registered at slot %d on bus %d\n",
771 dev_num, bus->bus_num);
776 pci_dev = (struct pci_device *)V3_Malloc(sizeof(struct pci_device));
778 if (pci_dev == NULL) {
779 PrintError("Could not allocate pci device\n");
783 memset(pci_dev, 0, sizeof(struct pci_device));
788 pci_dev->config_header.header_type = 0x00;
790 case PCI_MULTIFUNCTION:
791 pci_dev->config_header.header_type = 0x80;
794 PrintError("Unhandled PCI Device Type: %d\n", dev_type);
798 pci_dev->bus_num = bus_num;
799 pci_dev->dev_num = dev_num;
800 pci_dev->fn_num = fn_num;
802 strncpy(pci_dev->name, name, sizeof(pci_dev->name));
803 pci_dev->vm_dev = dev;
805 // register update callbacks
806 pci_dev->config_update = config_update;
807 pci_dev->cmd_update = cmd_update;
808 pci_dev->ext_rom_update = ext_rom_update;
812 for (i = 0; i < 6; i ++) {
813 pci_dev->bar[i].type = bars[i].type;
815 if (pci_dev->bar[i].type == PCI_BAR_IO) {
816 pci_dev->bar[i].num_ports = bars[i].num_ports;
817 pci_dev->bar[i].default_base_port = bars[i].default_base_port;
818 pci_dev->bar[i].io_read = bars[i].io_read;
819 pci_dev->bar[i].io_write = bars[i].io_write;
820 } else if (pci_dev->bar[i].type == PCI_BAR_MEM32) {
821 pci_dev->bar[i].num_pages = bars[i].num_pages;
822 pci_dev->bar[i].default_base_addr = bars[i].default_base_addr;
823 pci_dev->bar[i].mem_read = bars[i].mem_read;
824 pci_dev->bar[i].mem_write = bars[i].mem_write;
826 pci_dev->bar[i].num_pages = 0;
827 pci_dev->bar[i].default_base_addr = 0;
828 pci_dev->bar[i].mem_read = NULL;
829 pci_dev->bar[i].mem_write = NULL;
833 if (init_bars(pci_dev) == -1) {
834 PrintError("could not initialize bar registers\n");
839 add_device_to_bus(bus, pci_dev);
842 pci_dump_state(pci_state);