From 8295b7b4d465653626864ee058231f4273a23d4b Mon Sep 17 00:00:00 2001 From: Alexander Kudryavtsev Date: Fri, 23 Sep 2011 23:02:19 +0400 Subject: [PATCH 02/32] PCI Capabilities passthrough support --- palacios/include/devices/pci.h | 23 ++- palacios/src/devices/pci.c | 136 +++++++++--- palacios/src/devices/pci_passthrough.c | 400 +++++++++++++++++++++++++++----- palacios/src/devices/piix3.c | 7 +- 4 files changed, 466 insertions(+), 100 deletions(-) diff --git a/palacios/include/devices/pci.h b/palacios/include/devices/pci.h index 46864fd..dc0c665 100644 --- a/palacios/include/devices/pci.h +++ b/palacios/include/devices/pci.h @@ -146,6 +146,9 @@ struct pci_device { int exp_rom_update_flag; int bar_update_flag; + + int is_passthrough; // true if this device is real device passed through into guest + void * priv_data; }; @@ -173,15 +176,17 @@ v3_pci_register_device(struct vm_device * pci, void * priv_data); -struct pci_device * -v3_pci_register_passthrough_device(struct vm_device * pci, - int bus_num, - int dev_num, - int fn_num, - const char * name, - int (*config_write)(uint_t reg_num, void * src, uint_t length, void * private_data), - int (*config_read)(uint_t reg_num, void * dst, uint_t length, void * private_data), - void * private_data); +struct pci_device * v3_pci_register_passthrough_device(struct vm_device * pci, + pci_device_type_t dev_type, + int bus_num, + int dev_num, + int fn_num, + const char * name, + struct v3_pci_bar * bars, + int (*config_write)(uint_t reg_num, void * src, uint_t length, void * priv_data), + int (*config_read)(uint_t reg_num, void * src, uint_t length, void * priv_data), + int (*exp_rom_update)(struct pci_device * pci_dev, uint32_t * src, void * priv_data), + void * priv_data); #endif diff --git a/palacios/src/devices/pci.c b/palacios/src/devices/pci.c index 0400751..8ddaba4 100644 --- a/palacios/src/devices/pci.c +++ b/palacios/src/devices/pci.c @@ -328,14 +328,26 @@ static int data_port_read(struct guest_info * core, ushort_t port, void * dst, u return length; } - if (pci_dev->type == PCI_PASSTHROUGH) { - if (pci_dev->config_read(reg_num, dst, length, pci_dev->priv_data) == -1) { - PrintError("Failed to handle configuration update for passthrough pci_device\n"); - return -1; - } - - return 0; + + + /*if (pci_dev->type == PCI_PASSTHROUGH) { + if (pci_dev->config_read(reg_num, dst, length, pci_dev->priv_data) == -1) { + PrintError("Failed to handle configuration update for passthrough pci_device\n"); + return -1; + } + + return 0; + }*/ + + if (pci_dev->config_read) { + if (pci_dev->config_read(reg_num, dst, length, pci_dev->priv_data) == -1) { + PrintError("Failed to handle configuration read for passthrough pci device\n"); + return -1; + } + return length; } + + for (i = 0; i < length; i++) { *(uint8_t *)((uint8_t *)dst + i) = pci_dev->config_space[reg_num + i]; @@ -553,7 +565,10 @@ static int data_port_write(struct guest_info * core, ushort_t port, void * src, } if (pci_dev->config_update) { - pci_dev->config_update(reg_num, src, length, pci_dev->priv_data); + if(pci_dev->config_update(reg_num, src, length, pci_dev->priv_data)) { + PrintError("PCI: config_update returned error.\n"); + return -1; + } } // Scan for BAR updated @@ -933,59 +948,122 @@ struct pci_device * v3_pci_register_device(struct vm_device * pci, // if dev_num == -1, auto assign struct pci_device * v3_pci_register_passthrough_device(struct vm_device * pci, - int bus_num, - int dev_num, - int fn_num, - const char * name, - int (*config_write)(uint_t reg_num, void * src, uint_t length, void * private_data), - int (*config_read)(uint_t reg_num, void * dst, uint_t length, void * private_data), - void * private_data) { + pci_device_type_t dev_type, + int bus_num, + int dev_num, + int fn_num, + const char * name, + struct v3_pci_bar * bars, + int (*config_write)(uint_t reg_num, void * src, uint_t length, void * priv_data), + int (*config_read)(uint_t reg_num, void * src, uint_t length, void * priv_data), + int (*exp_rom_update)(struct pci_device * pci_dev, uint32_t * src, void * priv_data), + void * priv_data) { struct pci_internal * pci_state = (struct pci_internal *)pci->private_data; struct pci_bus * bus = &(pci_state->bus_list[bus_num]); struct pci_device * pci_dev = NULL; + int i; if (dev_num > MAX_BUS_DEVICES) { - PrintError("Requested Invalid device number (%d)\n", dev_num); - return NULL; + PrintError("Requested Invalid device number (%d)\n", dev_num); + return NULL; } if (dev_num == PCI_AUTO_DEV_NUM) { - PrintDebug("Searching for free device number\n"); - if ((dev_num = get_free_dev_num(bus)) == -1) { - PrintError("No more available PCI slots on bus %d\n", bus->bus_num); - return NULL; - } + PrintDebug("Searching for free device number\n"); + if ((dev_num = get_free_dev_num(bus)) == -1) { + PrintError("No more available PCI slots on bus %d\n", bus->bus_num); + return NULL; + } } PrintDebug("Checking for PCI Device\n"); if (get_device(bus, dev_num, fn_num) != NULL) { - PrintError("PCI Device already registered at slot %d on bus %d\n", - dev_num, bus->bus_num); - return NULL; + PrintError("PCI Device already registered at slot %d on bus %d\n", + dev_num, bus->bus_num); + return NULL; } pci_dev = (struct pci_device *)V3_Malloc(sizeof(struct pci_device)); if (pci_dev == NULL) { - PrintError("Could not allocate pci device\n"); - return NULL; + PrintError("Could not allocate pci device\n"); + return NULL; } memset(pci_dev, 0, sizeof(struct pci_device)); + + + pci_dev->type = dev_type; + switch (pci_dev->type) { + case PCI_STD_DEVICE: + pci_dev->config_header.header_type = 0x00; + break; + case PCI_MULTIFUNCTION: + pci_dev->config_header.header_type = 0x80; + break; + default: + PrintError("Unhandled PCI Device Type: %d\n", dev_type); + return NULL; + } + + + pci_dev->bus_num = bus_num; pci_dev->dev_num = dev_num; pci_dev->fn_num = fn_num; strncpy(pci_dev->name, name, sizeof(pci_dev->name)); - pci_dev->priv_data = private_data; + pci_dev->priv_data = priv_data; // register update callbacks - pci_dev->config_write = config_write; + pci_dev->config_update = config_write; pci_dev->config_read = config_read; + pci_dev->exp_rom_update = exp_rom_update; + + + //copy bars + for (i = 0; i < 6; i ++) { + pci_dev->bar[i].type = bars[i].type; + pci_dev->bar[i].private_data = bars[i].private_data; + + if (pci_dev->bar[i].type == PCI_BAR_IO) { + pci_dev->bar[i].num_ports = bars[i].num_ports; + + // This is a horrible HACK becaues the BIOS is supposed to set the PCI base ports + // And if the BIOS doesn't, Linux just happily overlaps device port assignments + if (bars[i].default_base_port != (uint16_t)-1) { + pci_dev->bar[i].default_base_port = bars[i].default_base_port; + } else { + pci_dev->bar[i].default_base_port = pci_state->dev_io_base; + pci_state->dev_io_base += ( 0x100 * ((bars[i].num_ports / 0x100) + 1) ); + } + + pci_dev->bar[i].io_read = bars[i].io_read; + pci_dev->bar[i].io_write = bars[i].io_write; + } else if (pci_dev->bar[i].type == PCI_BAR_MEM32) { + pci_dev->bar[i].num_pages = bars[i].num_pages; + pci_dev->bar[i].default_base_addr = bars[i].default_base_addr; + pci_dev->bar[i].mem_read = bars[i].mem_read; + pci_dev->bar[i].mem_write = bars[i].mem_write; + } else if (pci_dev->bar[i].type == PCI_BAR_PASSTHROUGH) { + pci_dev->bar[i].bar_init = bars[i].bar_init; + pci_dev->bar[i].bar_write = bars[i].bar_write; + } else { + pci_dev->bar[i].num_pages = 0; + pci_dev->bar[i].default_base_addr = 0; + pci_dev->bar[i].mem_read = NULL; + pci_dev->bar[i].mem_write = NULL; + } + } + + if (init_bars(pci->vm, pci_dev) == -1) { + PrintError("could not initialize bar registers\n"); + return NULL; + } // add the device add_device_to_bus(bus, pci_dev); diff --git a/palacios/src/devices/pci_passthrough.c b/palacios/src/devices/pci_passthrough.c index b8f921b..050cd02 100644 --- a/palacios/src/devices/pci_passthrough.c +++ b/palacios/src/devices/pci_passthrough.c @@ -41,11 +41,14 @@ #include #include + +#define MAX_PASSTHROUGH_DEVICES 64 + // Hardcoded... Are these standard?? #define PCI_CFG_ADDR 0xcf8 #define PCI_CFG_DATA 0xcfc -#define PCI_BUS_MAX 7 +#define PCI_BUS_MAX 256 #define PCI_DEV_MAX 32 #define PCI_FN_MAX 7 @@ -56,6 +59,20 @@ #define PCI_HDR_SIZE 256 +#define PCI_STATUS_CAP_LIST 0x10 + +#define PCI_HEADER_TYPE_NORMAL 0 +#define PCI_HEADER_TYPE_BRIDGE 1 +#define PCI_HEADER_TYPE_CARDBUS 2 + +#define PCI_CAPABILITY_LIST 0x34 +#define PCI_CB_CAPABILITY_LIST 0x14 + +#define PCI_CAP_LIST_NEXT 1 +#define PCI_CAP_ID_MSI 0x05 +#define PCI_CAP_ID_MSIX 0x11 + + union pci_addr_reg { uint32_t value; struct { @@ -92,7 +109,13 @@ struct pt_bar { }; - +struct cfg_range_hook { + uint_t start; + uint_t len; + int (*read)(struct vm_device *dev, uint_t reg_num, void* ptr, uint_t length); + int (*write)(struct vm_device *dev, uint_t reg_num, void* ptr, uint_t length); + struct list_head list; +}; struct pt_dev_state { union { @@ -111,52 +134,63 @@ struct pt_dev_state { union pci_addr_reg phys_pci_addr; + // ranges which should never be accessed in real PCI config space, only virtual copy. For example, excluded capabilities. + struct list_head cfg_virtual_ranges; + char name[32]; }; -static inline uint32_t pci_cfg_read32(uint32_t addr) { - v3_outdw(PCI_CFG_ADDR, addr); - return v3_indw(PCI_CFG_DATA); -} - - - -static inline void pci_cfg_write32(uint32_t addr, uint32_t val) { - v3_outdw(PCI_CFG_ADDR, addr); - v3_outdw(PCI_CFG_DATA, val); -} -static inline uint16_t pci_cfg_read16(uint32_t addr) { - v3_outw(PCI_CFG_ADDR, addr); - return v3_inw(PCI_CFG_DATA); -} - - - -static inline void pci_cfg_write16(uint32_t addr, uint16_t val) { - v3_outw(PCI_CFG_ADDR, addr); - v3_outw(PCI_CFG_DATA, val); -} - - +static int pci_cfg_read(uint32_t addr, int len, uint32_t *value) +{ + if((len == 2 && (addr & 1)) || (len == 4 && (addr & 3)) || (len != 1 && len != 2 && len!= 4)) { + PrintError("Bad configuration space read access: address %08x, length %d\n", addr, len); + return -1; + } -static inline uint8_t pci_cfg_read8(uint32_t addr) { - v3_outb(PCI_CFG_ADDR, addr); - return v3_inb(PCI_CFG_DATA); + v3_outdw(PCI_CFG_ADDR, addr & 0xfffffffc); + + switch (len) { + case 1: + *(uint8_t*)value = v3_inb(PCI_CFG_DATA + (addr & 3)); + break; + case 2: + *(uint16_t*)value = v3_inw(PCI_CFG_DATA + (addr & 2)); + break; + case 4: + *value = v3_indw(PCI_CFG_DATA); + break; + } + return 0; } +static inline int pci_cfg_write(uint32_t addr, int len, uint32_t value) +{ + if((len == 2 && (addr & 1)) || (len == 4 && (addr & 3)) || (len != 1 && len != 2 && len!= 4)) { + PrintError("Bad configuration space write access: address %08x, length %d\n", addr, len); + return -1; + } + v3_outdw(PCI_CFG_ADDR, addr & 0xfffffffc); + + switch (len) { + case 1: + v3_outb(PCI_CFG_DATA + (addr & 3), (uint8_t)value); + break; + case 2: + v3_outw(PCI_CFG_DATA + (addr & 2), (uint16_t)value); + break; + case 4: + v3_outdw(PCI_CFG_DATA, (uint32_t)value); + break; + } -static inline void pci_cfg_write8(uint32_t addr, uint8_t val) { - v3_outb(PCI_CFG_ADDR, addr); - v3_outb(PCI_CFG_DATA, val); + return 0; } - - static int pci_exp_rom_init(struct vm_device * dev, struct pt_dev_state * state) { struct pci_device * pci_dev = state->pci_dev; const uint32_t exp_rom_base_reg = 12; @@ -165,11 +199,12 @@ static int pci_exp_rom_init(struct vm_device * dev, struct pt_dev_state * state) uint32_t rom_val = 0; struct pt_bar * prom = &(state->phys_exp_rom); struct pt_bar * vrom = &(state->virt_exp_rom); + int status = 0; // should read from cached header pci_addr.reg = exp_rom_base_reg; - rom_val = pci_cfg_read32(pci_addr.value); + status |= pci_cfg_read(pci_addr.value, 4, &rom_val); prom->val = rom_val; prom->type = PT_EXP_ROM; @@ -182,10 +217,15 @@ static int pci_exp_rom_init(struct vm_device * dev, struct pt_dev_state * state) // What we probably want to do is write a 0 to the command register //irq_state = v3_irq_save(); - pci_cfg_write32(pci_addr.value, max_val); - max_val = pci_cfg_read32(pci_addr.value); - pci_cfg_write32(pci_addr.value, rom_val); - + status |= pci_cfg_write(pci_addr.value, 4, max_val); + status |= pci_cfg_read(pci_addr.value, 4, &max_val); + status |= pci_cfg_write(pci_addr.value, 4, rom_val); + if(status) { + PrintError("Failed to update EXP ROM base address value.\n"); + return -1; + } + + //v3_irq_restore(irq_state); prom->type = PT_EXP_ROM; @@ -233,6 +273,7 @@ static int pci_bar_init(int bar_num, uint32_t * dst, void * private_data) { union pci_addr_reg pci_addr = {state->phys_pci_addr.value}; uint32_t bar_val = 0; uint32_t max_val = 0; + int status = 0; //addr_t irq_state = 0; struct pt_bar * pbar = &(state->phys_bars[bar_num]); @@ -241,7 +282,10 @@ static int pci_bar_init(int bar_num, uint32_t * dst, void * private_data) { PrintDebug("PCI Address = 0x%x\n", pci_addr.value); - bar_val = pci_cfg_read32(pci_addr.value); + if(pci_cfg_read(pci_addr.value, 4, &bar_val)) { + PrintError("Failed to read BAR value from config space.\n"); + return -1; + } pbar->val = bar_val; // We preset this type when we encounter a MEM64 Low BAR @@ -250,9 +294,14 @@ static int pci_bar_init(int bar_num, uint32_t * dst, void * private_data) { max_val = PCI_MEM64_MASK_HI; - pci_cfg_write32(pci_addr.value, max_val); - max_val = pci_cfg_read32(pci_addr.value); - pci_cfg_write32(pci_addr.value, bar_val); + status |= pci_cfg_write(pci_addr.value, 4, max_val); + status |= pci_cfg_read(pci_addr.value, 4, &max_val); + status |= pci_cfg_write(pci_addr.value, 4, bar_val); + + if(status) { + PrintError("Failed to update BAR value for 64 bit PCI mem region.\n"); + return -1; + } pbar->addr = PCI_MEM64_BASE_HI(bar_val); pbar->addr <<= 32; @@ -292,9 +341,14 @@ static int pci_bar_init(int bar_num, uint32_t * dst, void * private_data) { // What we probably want to do is write a 0 to the command register //irq_state = v3_irq_save(); - pci_cfg_write32(pci_addr.value, max_val); - max_val = pci_cfg_read32(pci_addr.value); - pci_cfg_write32(pci_addr.value, bar_val); + status |= pci_cfg_write(pci_addr.value, 4, max_val); + status |= pci_cfg_read(pci_addr.value, 4, &max_val); + status |= pci_cfg_write(pci_addr.value, 4, bar_val); + + if(status) { + PrintError("Failed to update BAR value for PCI IO region.\n"); + return -1; + } //v3_irq_restore(irq_state); @@ -324,9 +378,14 @@ static int pci_bar_init(int bar_num, uint32_t * dst, void * private_data) { // What we probably want to do is write a 0 to the command register //irq_state = v3_irq_save(); - pci_cfg_write32(pci_addr.value, max_val); - max_val = pci_cfg_read32(pci_addr.value); - pci_cfg_write32(pci_addr.value, bar_val); + status |= pci_cfg_write(pci_addr.value, 4, max_val); + status |= pci_cfg_read(pci_addr.value, 4, &max_val); + status |= pci_cfg_write(pci_addr.value, 4, bar_val); + + if(status) { + PrintError("Failed to update BAR value for PCI mem region.\n"); + return -1; + } //v3_irq_restore(irq_state); @@ -587,6 +646,95 @@ static int pci_bar_write(int bar_num, uint32_t * src, void * private_data) { return 0; } +// find capability position and position of pointer to that capability. +static int pci_find_capability_and_pointer(struct pci_device *dev, int cap, uint8_t *ptr_pos) { + if(!(dev->config_header.status & PCI_STATUS_CAP_LIST)) { + return 0; // No capabilities + } + + uint8_t pos = PCI_CAPABILITY_LIST, ppos; + switch(dev->config_header.header_type & 0x7f) { + case PCI_HEADER_TYPE_NORMAL: + case PCI_HEADER_TYPE_BRIDGE: + pos = PCI_CAPABILITY_LIST; + break; + case PCI_HEADER_TYPE_CARDBUS: + pos = PCI_CB_CAPABILITY_LIST; + break; + default: + return 0; + } + + int ttl = 48; + uint8_t val; + + while (ttl--) { + + ppos = pos; + pos = dev->config_space[pos]; // read next offset + if (pos < 0x40) + break; + pos &= ~3; + val = dev->config_space[pos]; // read ID + + if (val == 0xff) + break; + if (val == cap) { + *ptr_pos = ppos; + return pos; + } + + pos += PCI_CAP_LIST_NEXT; + } + return 0; +} + +static int cfg_range_noop(struct vm_device *dev, uint_t reg_num, void *ptr, uint_t length) { + return 0; +} +static int cfg_range_read_virtual(struct vm_device *dev, uint_t reg_num, void *ptr, uint_t length) { + struct pt_dev_state * state = (struct pt_dev_state *)dev->private_data; + int i; + // read virtualized part of config space + for (i = 0; i < length; i++) { + *(uint8_t *)((uint8_t *)ptr + i) = state->pci_dev->config_space[reg_num + i]; + } + return 0; +} + +static int cfg_range_hook_add( + uint_t reg, + uint_t length, + int (*read)(struct vm_device *dev, uint_t reg_num, void *ptr, uint_t length), + int (*write)(struct vm_device *dev, uint_t reg_num, void *ptr, uint_t length), + struct pt_dev_state *state) { + struct cfg_range_hook *range = V3_Malloc(sizeof(*range)); + if(!range) return -1; + range->start = reg; + range->len = length; + range->read = read; + range->write = write; + list_add(&range->list, &state->cfg_virtual_ranges); + PrintDebug("%s: added hook to config space range %x:%x\n", + state->name, range->start, range->start + range->len); + return 0; +} + +static inline int cfg_range_hooked(uint_t reg, uint_t length, struct cfg_range_hook *rh, struct pt_dev_state *state) { + struct cfg_range_hook *range; + list_for_each_entry(range, &state->cfg_virtual_ranges, list) { + int b = range->start + range->len, a = range->start; + if (reg >= a && reg + length <= b) { + *rh = *range; + return 1; + } + if (!(reg >= b || reg + length <= a)) { + PrintError("%s: access to %x, len %x, bad overlapping with excluded range %x, len %x\n", state->name, reg, length, a, b); + return -1; + } + } + return 0; +} static int pt_config_update(uint_t reg_num, void * src, uint_t length, void * private_data) { struct vm_device * dev = (struct vm_device *)private_data; @@ -594,15 +742,68 @@ static int pt_config_update(uint_t reg_num, void * src, uint_t length, void * pr union pci_addr_reg pci_addr = {state->phys_pci_addr.value}; pci_addr.reg = reg_num >> 2; + uint32_t addr = pci_addr.value | (reg_num & 3); + int status = 0; + + struct cfg_range_hook rh = {0}; + status = cfg_range_hooked(reg_num, length, &rh, state); + if(status == -1) return -1; + if(status) { + if(rh.write) + return rh.write(dev, reg_num, src, length); + PrintError("PCI CFG Range write hook is NULL for range %x:%x\n", rh.start, rh.start + rh.len); + return -1; + } + // no hook present, assume default behaviour if (length == 1) { - pci_cfg_write8(pci_addr.value, *(uint8_t *)src); + status = pci_cfg_write(addr, length, *(uint8_t *)src); } else if (length == 2) { - pci_cfg_write16(pci_addr.value, *(uint16_t *)src); + status = pci_cfg_write(addr, length, *(uint16_t *)src); } else if (length == 4) { - pci_cfg_write32(pci_addr.value, *(uint32_t *)src); + status = pci_cfg_write(addr, length, *(uint32_t *)src); + } else { + PrintError("%s: Config space update for %x reg, invalid length %d\n", state->name, reg_num, length); + return -1; + } + if(status) { + PrintError("%s: Config space update for %x reg, length %d, failed\n", state->name, reg_num, length); + return -1; + } + + //PrintDebug("%s: Update to %x reg, len %d, value %08x (addr %08x)\n", state->name, reg_num, length, *(uint32_t *)src, pci_addr.value); + return 0; +} + +static int pt_config_read(uint_t reg_num, void * dst, uint_t length, void * private_data) { + struct vm_device * dev = (struct vm_device *)private_data; + struct pt_dev_state * state = (struct pt_dev_state *)dev->private_data; + union pci_addr_reg pci_addr = {state->phys_pci_addr.value}; + pci_addr.reg = reg_num >> 2; + int i, status; + + struct cfg_range_hook rh = {0}; + status = cfg_range_hooked(reg_num, length, &rh, state); + if(status == -1) return -1; + if(status) { + if(rh.read) + return rh.read(dev, reg_num, dst, length); + PrintError("PCI CFG Range read hook is NULL for range %x:%x\n", rh.start, rh.start + rh.len); + return -1; + } + + // no range hook means default behaviour - read data from real config space and update virtual config space + uint32_t addr = pci_addr.value | (reg_num & 3); + if(pci_cfg_read(addr, length, dst) != 0) { + PrintError(" %s: Config space read from %x reg, length %d, failed\n", state->name, reg_num, length); + return -1; } + for (i = 0; i < length; i++) { + state->pci_dev->config_space[reg_num + i] = *(uint8_t *)((uint8_t *)dst + i); + } + + //PrintDebug("%s: Read from %x reg, len %d, value %08x\n", state->name, reg_num, length, *(uint32_t *)dst); return 0; } @@ -665,6 +866,38 @@ static int pt_exp_rom_write(struct pci_device * pci_dev, uint32_t * src, void * } +static int pci_exclude_capability(struct v3_vm_info * vm_info, struct pt_dev_state *state, char *cap_name) { + struct pci_device *dev = state->pci_dev; + + uint8_t cap = 0, pos, ppos = 0; + + if(!strcmp(cap_name, "MSI")) { + cap = PCI_CAP_ID_MSI; + } else if(!strcmp(cap_name, "MSI-X")) { + cap = PCI_CAP_ID_MSIX; + } else { + PrintError("Unknown capability name: %s\n", cap_name); + return -1; + } + + pos = pci_find_capability_and_pointer(dev, cap, &ppos); + if (!pos) { + PrintError("Warning: capability %s not found.\n", cap_name); + } else { + // modify list to exclude entry + PrintDebug("%s capability at %x excluded.\n", cap_name, pos); + dev->config_space[ppos] = dev->config_space[pos + PCI_CAP_LIST_NEXT] & ~3; + dev->config_space[pos] = 0; + if(cfg_range_hook_add(ppos & ~3, 4, cfg_range_read_virtual, cfg_range_noop, state)) return -1; + } + return 0; +} + + + +// This array is used for tracking already added PCI passthrough devices +static uint_t pt_devs_bdf[MAX_PASSTHROUGH_DEVICES]; + static int find_real_pci_dev(uint16_t vendor_id, uint16_t device_id, struct pt_dev_state * state) { union pci_addr_reg pci_addr = {0x80000000}; uint_t i, j, k, m; @@ -682,6 +915,18 @@ static int find_real_pci_dev(uint16_t vendor_id, uint16_t device_id, struct pt_d for (j = 0, pci_addr.dev = 0; j < PCI_DEV_MAX; j++, pci_addr.dev++) { for (k = 0, pci_addr.func = 0; k < PCI_FN_MAX; k++, pci_addr.func++) { + // search for the same device already passed through. + uint_t bdf = (i << 8) + (j << 5) + k; + int found = 0; + for(m = 0; m < MAX_PASSTHROUGH_DEVICES; ++m) { + if(pt_devs_bdf[m] == bdf) { + found = 1; + break; + } + if(!pt_devs_bdf[m]) break; + } + if (found) + continue; v3_outdw(PCI_CFG_ADDR, pci_addr.value); pci_hdr.value = v3_indw(PCI_CFG_DATA); @@ -694,9 +939,20 @@ static int find_real_pci_dev(uint16_t vendor_id, uint16_t device_id, struct pt_d // Copy the configuration space to the local cached version for (m = 0, pci_addr.reg = 0; m < PCI_HDR_SIZE; m += 4, pci_addr.reg++) { - cfg_space[pci_addr.reg] = pci_cfg_read32(pci_addr.value); + if(pci_cfg_read(pci_addr.value, 4, &cfg_space[pci_addr.reg])) { + PrintError("Failed to read configuration space for device %x:%x (bus=%d, dev=%d, func=%d)\n", + vendor_id, device_id, + pci_addr.bus, pci_addr.dev, pci_addr.func); + return -1; + } } + // mark this device as passed through + for(m = 0; m < MAX_PASSTHROUGH_DEVICES; ++m) + if (!pt_devs_bdf[m]) { + pt_devs_bdf[m] = bdf; + break; + } PrintDebug("Found device %x:%x (bus=%d, dev=%d, func=%d)\n", vendor_id, device_id, @@ -713,7 +969,7 @@ static int find_real_pci_dev(uint16_t vendor_id, uint16_t device_id, struct pt_d -static int setup_virt_pci_dev(struct v3_vm_info * vm_info, struct vm_device * dev) { +static int setup_virt_pci_dev(struct v3_vm_info * vm_info, struct vm_device * dev, v3_cfg_tree_t * cfg) { struct pt_dev_state * state = (struct pt_dev_state *)dev->private_data; struct pci_device * pci_dev = NULL; struct v3_pci_bar bars[6]; @@ -727,24 +983,42 @@ static int setup_virt_pci_dev(struct v3_vm_info * vm_info, struct vm_device * de bars[i].bar_write = pci_bar_write; } - pci_dev = v3_pci_register_device(state->pci_bus, + pci_dev = v3_pci_register_passthrough_device(state->pci_bus, PCI_STD_DEVICE, bus_num, -1, 0, state->name, bars, pt_config_update, - NULL, - pt_exp_rom_write, + pt_config_read, + pt_exp_rom_write, dev); - // This will overwrite the bar registers.. but that should be ok. - memcpy(pci_dev->config_space, (uint8_t *)&(state->real_hdr), sizeof(struct pci_config_header)); + memcpy(pci_dev->config_space, (uint8_t *)&(state->real_hdr), PCI_HDR_SIZE); state->pci_dev = pci_dev; pci_exp_rom_init(dev, state); - v3_sym_map_pci_passthrough(vm_info, pci_dev->bus_num, pci_dev->dev_num, pci_dev->fn_num); + INIT_LIST_HEAD(&state->cfg_virtual_ranges); + int status = 0; + // do not passthrough accesses to the following registers. + status |= cfg_range_hook_add(0x0, 0x4, cfg_range_read_virtual, cfg_range_noop, state); // device and vendor IDs + status |= cfg_range_hook_add(0x8, 0x4, cfg_range_read_virtual, cfg_range_noop, state); // revision ID and class code + status |= cfg_range_hook_add(0x10, 0x30, cfg_range_read_virtual, cfg_range_noop, state); // BARs etc. + if(status) return -1; + + cfg = v3_cfg_subtree(cfg, "exclude_cap"); + while(cfg) { + if(!strcmp(cfg->name, "exclude_cap")) { + char *cap_name = v3_xml_txt(cfg); + V3_Print("Trying to exclude cap: %s\n", cap_name); + if(pci_exclude_capability(vm_info, state, cap_name)) return -1; + } + cfg = v3_cfg_next_branch(cfg); + } + + + if(v3_sym_map_pci_passthrough(vm_info, pci_dev->bus_num, pci_dev->dev_num, pci_dev->fn_num)) return -1; return 0; @@ -809,11 +1083,15 @@ static int passthrough_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { return 0; } - setup_virt_pci_dev(vm, dev); + setup_virt_pci_dev(vm, dev, cfg); v3_hook_irq(vm, atoi(v3_cfg_val(cfg, "irq")), irq_handler, dev); // v3_hook_irq(info, 64, irq_handler, dev); + state->pci_dev->is_passthrough = 1; + + + return 0; } diff --git a/palacios/src/devices/piix3.c b/palacios/src/devices/piix3.c index 8752943..720800f 100644 --- a/palacios/src/devices/piix3.c +++ b/palacios/src/devices/piix3.c @@ -380,7 +380,12 @@ static int raise_pci_irq(struct pci_device * pci_dev, void * dev_data) { //PrintError("Raising PCI IRQ %d, %p\n", piix3_cfg->pirq_rc[irq_index], piix3->vm); - if (piix3_cfg->pirq_rc[irq_index] < 16) { + if(pci_dev->is_passthrough) { + int guest_irq = pci_dev->config_header.intr_line; + + v3_raise_irq(piix3->vm, guest_irq); + + } else if (piix3_cfg->pirq_rc[irq_index] < 16) { v3_raise_irq(piix3->vm, piix3_cfg->pirq_rc[irq_index] & 0xf); } else { PrintError("Tried to raise interrupt on disabled PIRQ entry (%d)\n", irq_index); -- 1.7.5.4