2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2011, Peter Dinda <pdinda@northwestern.edu>
11 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
13 * All rights reserved.
16 * Peter Dinda <pdinda@northwestern.edu> (PCI front device forwarding to host dev interface)
17 * Jack Lange <jarusl@cs.northwestern.edu> (original PCI passthrough to physical hardware)
19 * This is free software. You are permitted to use,
20 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
25 This is front-end PCI device intended to be used together with the
26 host device interface and a *virtual* PCI device implementation in
27 the host OS. It makes it possible to project such a virtual device
28 into the guest as a PCI device. It's based on the PCI passthrough
29 device, which projects *physical* PCI devices into the guest.
31 If you need to project a non-PCI host-based virtual or physical
32 device into the guest, you should use the generic device.
37 * The basic idea is that we do not change the hardware PCI configuration
38 * Instead we modify the guest environment to map onto the physical configuration
40 * The pci subsystem handles most of the configuration space, except for the bar registers.
41 * We handle them here, by either letting them go directly to hardware or remapping through virtual hooks
43 * Memory Bars are always remapped via the shadow map,
44 * IO Bars are selectively remapped through hooks if the guest changes them
47 #include <palacios/vmm.h>
48 #include <palacios/vmm_dev_mgr.h>
49 #include <palacios/vmm_sprintf.h>
50 #include <palacios/vmm_lowlevel.h>
51 #include <palacios/vm_guest.h>
52 #include <palacios/vmm_symspy.h>
54 #include <devices/pci.h>
55 #include <devices/pci_types.h>
57 #include <interfaces/vmm_host_dev.h>
60 #ifndef V3_CONFIG_DEBUG_PCI_FRONT
62 #define PrintDebug(fmt, args...)
66 // Our own address in PCI-land
77 } __attribute__((packed));
78 } __attribute__((packed));
81 // identical to PCI passthrough device
82 typedef enum { PT_BAR_NONE,
88 PT_EXP_ROM } pt_bar_type_t;
90 // identical to PCI passthrough device
95 /* We store 64 bit memory bar addresses in the high BAR
96 * because they are the last to be updated
97 * This means that the addr field must be 64 bits
107 struct pci_front_internal {
108 // this is our local cache of what the host device has
110 uint8_t config_space[256];
111 struct pci_config_header real_hdr;
112 } __attribute__((packed));
114 // We do need a representation of the bars
115 // since we need to be made aware when they are written
116 // so that we can change the hooks.
118 // We assume here that the PCI subsystem, on a bar write
119 // will first send us a config_update, which we forward to
120 // the host dev. Then it will send us a bar update
121 // which we will use to rehook the device
123 struct pt_bar bars[6]; // our bars (for update purposes)
125 // Currently unsupported
127 //struct pt_bar exp_rom; // and exp ram areas of the config space, above
129 struct vm_device *pci_bus; // what bus we are attached to
130 struct pci_device *pci_dev; // our representation as a registered PCI device
132 union pci_addr_reg pci_addr; // our pci address
136 v3_host_dev_t host_dev; // the actual implementation
142 static int push_config(struct pci_front_internal *state, uint8_t *config)
144 if (v3_host_dev_config_write(state->host_dev, 0, config, 256) != 256) {
152 static int pull_config(struct pci_front_internal *state, uint8_t *config)
154 if (v3_host_dev_read_config(state->host_dev, 0, config, 256) != 256) {
162 static int pci_front_read_mem(struct guest_info * core,
170 struct vm_device *dev = (struct vm_device *) priv;
171 struct pci_front_internal *state = (struct pci_front_internal *) dev->private_data;
173 PrintDebug(info->vm_info, info, "pci_front (%s): reading 0x%x bytes from gpa 0x%p from host dev 0x%p ...",
174 state->name, len, (void*)gpa, state->host_dev);
176 rc = v3_host_dev_read_mem(state->host_dev, gpa, dst, len);
178 PrintDebug(info->vm_info, info, " done ... read %d bytes: 0x", rc);
180 for (i = 0; i < rc; i++) {
181 PrintDebug(info->vm_info, info, "%x", ((uint8_t *)dst)[i]);
184 PrintDebug(info->vm_info, info, "\n");
189 static int pci_front_write_mem(struct guest_info * core,
197 struct vm_device *dev = (struct vm_device *) priv;
198 struct pci_front_internal *state = (struct pci_front_internal *) dev->private_data;
200 PrintDebug(info->vm_info, info, "pci_front (%s): writing 0x%x bytes to gpa 0x%p to host dev 0x%p bytes=0x",
201 state->name, len, (void*)gpa, state->host_dev);
203 for (i = 0; i < len; i++) {
204 PrintDebug(info->vm_info, info, "%x", ((uint8_t *)src)[i]);
207 rc = v3_host_dev_write_mem(state->host_dev, gpa, src, len);
209 PrintDebug(info->vm_info, info, " %d bytes written\n",rc);
215 static int pci_front_read_port(struct guest_info * core,
222 struct pci_front_internal *state = (struct pci_front_internal *) priv_data;
224 PrintDebug(info->vm_info, info, "pci_front (%s): reading 0x%x bytes from port 0x%x from host dev 0x%p ...",
225 state->name, len, port, state->host_dev);
227 int rc = v3_host_dev_read_io(state->host_dev, port, dst, len);
229 PrintDebug(info->vm_info, info, " done ... read %d bytes: 0x", rc);
231 for (i = 0; i < rc; i++) {
232 PrintDebug(info->vm_info, info, "%x", ((uint8_t *)dst)[i]);
235 PrintDebug(info->vm_info, info, "\n");
241 static int pci_front_write_port(struct guest_info * core,
248 struct pci_front_internal *state = (struct pci_front_internal *) priv_data;
250 PrintDebug(info->vm_info, info, "pci_front (%s): writing 0x%x bytes to port 0x%x to host dev 0x%p bytes=0x",
251 state->name, len, port, state->host_dev);
253 for (i = 0; i < len; i++) {
254 PrintDebug(info->vm_info, info, "%x", ((uint8_t *)src)[i]);
257 int rc = v3_host_dev_write_io(state->host_dev, port, src, len);
259 PrintDebug(info->vm_info, info, " %d bytes written\n",rc);
267 // This is called at registration time for the device
269 // We assume that someone has called pull_config to get a local
270 // copy of the config data from the host device by this point
272 static int pci_bar_init(int bar_num, uint32_t * dst, void * private_data) {
273 struct vm_device * dev = (struct vm_device *)private_data;
274 struct pci_front_internal * state = (struct pci_front_internal *)(dev->private_data);
277 const uint32_t bar_base_reg = 4; // offset in 32bit words to skip to the first bar
279 union pci_addr_reg pci_addr = {state->pci_addr.value}; // my address
281 uint32_t bar_val = 0;
282 uint32_t max_val = 0;
284 struct pt_bar * pbar = &(state->bars[bar_num]);
286 pci_addr.reg = bar_base_reg + bar_num;
288 PrintDebug(info->vm_info, info, "pci_front (%s): pci_bar_init: PCI Address = 0x%x\n", state->name, pci_addr.value);
290 // This assumees that pull_config() has been previously called and
291 // we have a local copy of the host device's configuration space
292 bar_val = *((uint32_t*)(&(state->config_space[(bar_base_reg+bar_num)*4])));
294 // Now let's set our copy of the relevant bar accordingly
297 // Now we will configure the hooks relevant to this bar
299 // We preset this type when we encounter a MEM64 Low BAR
300 // This is a 64 bit memory region that we turn into a memory hook
301 if (pbar->type == PT_BAR_MEM64_HI) {
302 struct pt_bar * lo_pbar = &(state->bars[bar_num - 1]);
304 max_val = PCI_MEM64_MASK_HI;
306 pbar->size += lo_pbar->size;
308 PrintDebug(info->vm_info, info, "pci_front (%s): pci_bar_init: Adding 64 bit PCI mem region: start=0x%p, end=0x%p as a full hook\n",
310 (void *)(addr_t)pbar->addr,
311 (void *)(addr_t)(pbar->addr + pbar->size));
313 if (v3_hook_full_mem(dev->vm,
316 pbar->addr+pbar->size-1,
321 PrintError(info->vm_info, info, "pci_front (%s): pci_bar_init: failed to hook 64 bit region (0x%p, 0x%p)\n",
323 (void *)(addr_t)pbar->addr,
324 (void *)(addr_t)(pbar->addr + pbar->size - 1));
328 } else if ((bar_val & 0x3) == 0x1) {
329 // This an I/O port region which we will turn into a range of hooks
333 pbar->type = PT_BAR_IO;
334 pbar->addr = PCI_IO_BASE(bar_val);
336 max_val = bar_val | PCI_IO_MASK;
338 pbar->size = (uint16_t)~PCI_IO_BASE(max_val) + 1;
341 PrintDebug(info->vm_info, info, "pci_front (%s): pci_bar_init: hooking ports 0x%x through 0x%x\n",
342 state->name, (uint32_t)pbar->addr, (uint32_t)pbar->addr + pbar->size - 1);
344 for (i = 0; i < pbar->size; i++) {
345 if (v3_dev_hook_io(dev,
348 pci_front_write_port)<0) {
349 PrintError(info->vm_info, info, "pci_front (%s): pci_bar_init: unabled to hook I/O port 0x%x\n",state->name, (unsigned)(pbar->addr+i));
356 // might be a 32 bit memory region or an empty bar
358 max_val = bar_val | PCI_MEM_MASK;
361 // nothing, so just ignore it
362 pbar->type = PT_BAR_NONE;
365 // memory region - hook it
367 if ((bar_val & 0x6) == 0x0) {
368 // 32 bit memory region
370 pbar->type = PT_BAR_MEM32;
371 pbar->addr = PCI_MEM32_BASE(bar_val);
372 pbar->size = ~PCI_MEM32_BASE(max_val) + 1;
374 PrintDebug(info->vm_info, info, "pci_front (%s): pci_init_bar: adding 32 bit PCI mem region: start=0x%p, end=0x%p\n",
376 (void *)(addr_t)pbar->addr,
377 (void *)(addr_t)(pbar->addr + pbar->size));
379 if (v3_hook_full_mem(dev->vm,
382 pbar->addr+pbar->size-1,
386 PrintError(info->vm_info, info, "pci_front (%s): pci_init_bar: unable to hook 32 bit memory region 0x%p to 0x%p\n",
387 state->name, (void*)(pbar->addr), (void*)(pbar->addr+pbar->size-1));
391 } else if ((bar_val & 0x6) == 0x2) {
393 // 24 bit memory region
395 pbar->type = PT_BAR_MEM24;
396 pbar->addr = PCI_MEM24_BASE(bar_val);
397 pbar->size = ~PCI_MEM24_BASE(max_val) + 1;
400 if (v3_hook_full_mem(dev->vm,
403 pbar->addr+pbar->size-1,
407 PrintError(info->vm_info, info, "pci_front (%s): pci_init_bar: unable to hook 24 bit memory region 0x%p to 0x%p\n",
408 state->name, (void*)(pbar->addr), (void*)(pbar->addr+pbar->size-1));
412 } else if ((bar_val & 0x6) == 0x4) {
414 // partial update of a 64 bit region, no hook done yet
416 struct pt_bar * hi_pbar = &(state->bars[bar_num + 1]);
418 pbar->type = PT_BAR_MEM64_LO;
419 hi_pbar->type = PT_BAR_MEM64_HI;
421 // Set the low bits, only for temporary storage until we calculate the high BAR
422 pbar->addr = PCI_MEM64_BASE_LO(bar_val);
423 pbar->size = ~PCI_MEM64_BASE_LO(max_val) + 1;
425 PrintDebug(info->vm_info, info, "pci_front (%s): pci_bar_init: partial 64 bit update\n",state->name);
428 PrintError(info->vm_info, info, "pci_front (%s): pci_bar_init: invalid memory bar type\n",state->name);
437 // Update the pci subsystem versions
445 // If the guest modifies a BAR, we expect that pci.c will do the following,
448 // 1. notify us via the config_update callback, which we will feed back
449 // to the host device
450 // 2. notify us of the bar change via the following callback
452 // This callback will unhook as needed for the old bar value and rehook
453 // as needed for the new bar value
455 static int pci_bar_write(int bar_num, uint32_t * src, void * private_data) {
456 struct vm_device * dev = (struct vm_device *)private_data;
457 struct pci_front_internal * state = (struct pci_front_internal *)dev->private_data;
459 struct pt_bar * pbar = &(state->bars[bar_num]);
461 PrintDebug(info->vm_info, info, "pci_front (%s): bar update: bar_num=%d, src=0x%x\n", state->name, bar_num, *src);
462 PrintDebug(info->vm_info, info, "pci_front (%s): the current bar has size=%u, type=%d, addr=%p, val=0x%x\n",
463 state->name, pbar->size, pbar->type, (void *)(addr_t)pbar->addr, pbar->val);
467 if (pbar->type == PT_BAR_NONE) {
468 PrintDebug(info->vm_info, info, "pci_front (%s): bar update is to empty bar - ignored\n",state->name);
470 } else if (pbar->type == PT_BAR_IO) {
474 PrintDebug(info->vm_info, info, "pci_front (%s): unhooking I/O ports 0x%x through 0x%x\n",
476 (unsigned)(pbar->addr), (unsigned)(pbar->addr+pbar->size-1));
477 for (i = 0; i < pbar->size; i++) {
478 if (v3_dev_unhook_io(dev, pbar->addr + i) == -1) {
479 PrintError(info->vm_info, info, "pci_front (%s): could not unhook previously hooked port.... 0x%x\n",
481 (uint32_t)pbar->addr + i);
486 PrintDebug(info->vm_info, info, "pci_front (%s): setting I/O Port range size=%d\n", state->name, pbar->size);
489 // Not clear if this cooking is needed... why not trust
490 // the write? Who cares if it wants to suddenly hook more ports?
493 // clear the low bits to match the size
494 *src &= ~(pbar->size - 1);
497 *src |= (pbar->val & ~PCI_IO_MASK);
499 pbar->addr = PCI_IO_BASE(*src);
501 PrintDebug(info->vm_info, info, "pci_front (%s): cooked src=0x%x\n", state->name, *src);
503 PrintDebug(info->vm_info, info, "pci_front (%s): rehooking I/O ports 0x%x through 0x%x\n",
504 state->name, (unsigned)(pbar->addr), (unsigned)(pbar->addr+pbar->size-1));
506 for (i = 0; i < pbar->size; i++) {
507 if (v3_dev_hook_io(dev,
510 pci_front_write_port)<0) {
511 PrintError(info->vm_info, info, "pci_front (%s): unable to rehook port 0x%x\n",state->name, (unsigned)(pbar->addr+i));
516 } else if (pbar->type == PT_BAR_MEM32) {
518 if (v3_unhook_mem(dev->vm,V3_MEM_CORE_ANY,pbar->addr)<0) {
519 PrintError(info->vm_info, info, "pci_front (%s): unable to unhook 32 bit memory region starting at 0x%p\n",
520 state->name, (void*)(pbar->addr));
524 // Again, not sure I need to do this cooking...
526 // clear the low bits to match the size
527 *src &= ~(pbar->size - 1);
530 *src |= (pbar->val & ~PCI_MEM_MASK);
532 PrintDebug(info->vm_info, info, "pci_front (%s): cooked src=0x%x\n", state->name, *src);
534 pbar->addr = PCI_MEM32_BASE(*src);
536 PrintDebug(info->vm_info, info, "pci_front (%s): rehooking 32 bit memory region 0x%p through 0x%p\n",
537 state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
539 if (v3_hook_full_mem(dev->vm,
542 pbar->addr+pbar->size-1,
546 PrintError(info->vm_info, info, "pci_front (%s): unable to rehook 32 bit memory region 0x%p through 0x%p\n",
547 state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
551 } else if (pbar->type == PT_BAR_MEM64_LO) {
552 // We only store the written values here, the actual reconfig comes when the high BAR is updated
554 // clear the low bits to match the size
555 *src &= ~(pbar->size - 1);
558 *src |= (pbar->val & ~PCI_MEM_MASK);
560 // Temp storage, used when hi bar is written
561 pbar->addr = PCI_MEM64_BASE_LO(*src);
563 PrintDebug(info->vm_info, info, "pci_front (%s): handled partial update for 64 bit memory region\n",state->name);
565 } else if (pbar->type == PT_BAR_MEM64_HI) {
566 struct pt_bar * lo_vbar = &(state->bars[bar_num - 1]);
568 if (v3_unhook_mem(dev->vm,V3_MEM_CORE_ANY,pbar->addr)<0) {
569 PrintError(info->vm_info, info, "pci_front (%s): unable to unhook 64 bit memory region starting at 0x%p\n",
570 state->name, (void*)(pbar->addr));
575 // We don't set size, because we assume region is less than 4GB
578 *src |= (pbar->val & ~PCI_MEM64_MASK_HI);
580 pbar->addr = PCI_MEM64_BASE_HI(*src);
582 pbar->addr += lo_vbar->addr;
584 PrintDebug(info->vm_info, info, "pci_front (%s): rehooking 64 bit memory region 0x%p through 0x%p\n",
585 state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
587 if (v3_hook_full_mem(dev->vm,
590 pbar->addr+pbar->size-1,
594 PrintError(info->vm_info, info, "pci_front (%s): unable to rehook 64 bit memory region 0x%p through 0x%p\n",
595 state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
600 PrintError(info->vm_info, info, "pci_front (%s): unhandled PCI bar type %d\n", state->name, pbar->type);
610 static int pci_front_config_update(struct pci_device *pci_dev, uint_t reg_num, void * src, uint_t length, void * private_data)
613 struct vm_device * dev = (struct vm_device *)private_data;
614 struct pci_front_internal * state = (struct pci_front_internal *)dev->private_data;
615 union pci_addr_reg pci_addr = {state->pci_addr.value};
617 pci_addr.reg = reg_num >> 2;
619 PrintDebug(info->vm_info, info, "pci_front (%s): configuration update: writing 0x%x bytes at offset 0x%x to host device 0x%p, bytes=0x",
620 state->name, length, pci_addr.value, state->host_dev);
622 for (i = 0; i < length; i++) {
623 PrintDebug(info->vm_info, info, "%x", ((uint8_t *)src)[i]);
626 PrintDebug(info->vm_info, info, "\n");
628 if (v3_host_dev_write_config(state->host_dev,
632 PrintError(info->vm_info, info, "pci_front (%s): configuration update: unable to write all bytes\n",state->name);
641 static int unhook_all_mem(struct pci_front_internal *state)
644 struct vm_device *bus = state->pci_bus;
647 for (bar_num=0;bar_num<6;bar_num++) {
648 struct pt_bar * pbar = &(state->bars[bar_num]);
650 PrintDebug(info->vm_info, info, "pci_front (%s): unhooking for bar %d\n", state->name, bar_num);
652 if (pbar->type == PT_BAR_MEM32) {
653 if (v3_unhook_mem(bus->vm,V3_MEM_CORE_ANY,pbar->addr)<0) {
654 PrintError(info->vm_info, info, "pci_front (%s): unable to unhook 32 bit memory region starting at 0x%p\n",
655 state->name, (void*)(pbar->addr));
658 } else if (pbar->type == PT_BAR_MEM64_HI) {
660 if (v3_unhook_mem(bus->vm,V3_MEM_CORE_ANY,pbar->addr)<0) {
661 PrintError(info->vm_info, info, "pci_front (%s): unable to unhook 64 bit memory region starting at 0x%p\n",
662 state->name, (void*)(pbar->addr));
673 static int setup_virt_pci_dev(struct v3_vm_info * vm_info, struct vm_device * dev)
675 struct pci_front_internal * state = (struct pci_front_internal *)dev->private_data;
676 struct pci_device * pci_dev = NULL;
677 struct v3_pci_bar bars[6];
681 for (i = 0; i < 6; i++) {
682 bars[i].type = PCI_BAR_PASSTHROUGH;
683 bars[i].private_data = dev;
684 bars[i].bar_init = pci_bar_init;
685 bars[i].bar_write = pci_bar_write;
688 pci_dev = v3_pci_register_device(state->pci_bus,
692 pci_front_config_update,
693 NULL, // no suport for config reads
694 NULL, // no support for command updates
695 NULL, // no support for expansion roms
699 state->pci_dev = pci_dev;
702 // EXPANSION ROMS CURRENTLY UNSUPPORTED
704 // COMMANDS CURRENTLY UNSUPPORTED
712 // Note: potential bug: not clear what pointer I get here
714 static int pci_front_free(struct pci_front_internal *state)
717 if (unhook_all_mem(state)<0) {
721 // the device manager will unhook the i/o ports for us
723 if (state->host_dev) {
724 v3_host_dev_close(state->host_dev);
731 PrintDebug(info->vm_info, info, "pci_front (%s): freed\n",state->name);
737 static struct v3_device_ops dev_ops = {
739 // Note: potential bug: not clear what pointer I get here
741 .free = (int (*)(void*))pci_front_free,
750 static int pci_front_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg)
752 struct vm_device * dev;
753 struct vm_device * bus;
754 struct pci_front_internal *state;
760 if (!(dev_id = v3_cfg_val(cfg, "ID"))) {
761 PrintError(info->vm_info, info, "pci_front: no id given!\n");
765 if (!(bus_id = v3_cfg_val(cfg, "bus"))) {
766 PrintError(info->vm_info, info, "pci_front (%s): no bus given!\n",dev_id);
770 if (!(url = v3_cfg_val(cfg, "hostdev"))) {
771 PrintError(info->vm_info, info, "pci_front (%s): no host device url given!\n",dev_id);
775 if (!(bus = v3_find_dev(vm,bus_id))) {
776 PrintError(info->vm_info, info, "pci_front (%s): cannot attach to bus %s\n",dev_id,bus_id);
780 if (!(state = V3_Malloc(sizeof(struct pci_front_internal)))) {
781 PrintError(info->vm_info, info, "pci_front (%s): cannot allocate state for device\n",dev_id);
785 memset(state, 0, sizeof(struct pci_front_internal));
787 state->pci_bus = bus;
788 strncpy(state->name, dev_id, 32);
790 if (!(dev = v3_add_device(vm, dev_id, &dev_ops, state))) {
791 PrintError(info->vm_info, info, "pci_front (%s): unable to add device\n",state->name);
795 if (!(state->host_dev=v3_host_dev_open(url,V3_BUS_CLASS_PCI,dev,vm))) {
796 PrintError(info->vm_info, info, "pci_front (%s): unable to attach to host device %s\n",state->name, url);
797 v3_remove_device(dev);
801 // fetch config space from the host
802 if (pull_config(state,state->config_space)) {
803 PrintError(info->vm_info, info, "pci_front (%s): cannot initially configure device\n",state->name);
804 v3_remove_device(dev);
808 // setup virtual device for now
809 if (setup_virt_pci_dev(vm,dev)<0) {
810 PrintError(info->vm_info, info, "pci_front (%s): cannot set up virtual pci device\n", state->name);
811 v3_remove_device(dev);
815 // We do not need to hook anything here since pci will call
816 // us back via the bar_init functions
818 PrintDebug(info->vm_info, info, "pci_front (%s): inited and ready to be Potemkinized\n",state->name);
825 device_register("PCI_FRONT", pci_front_init)