PCI Capability passthrough update From: Alexander Kudryavtsev --- palacios/src/devices/pci_passthrough.c | 100 +++++++++++++++++++++----------- 1 files changed, 66 insertions(+), 34 deletions(-) diff --git a/palacios/src/devices/pci_passthrough.c b/palacios/src/devices/pci_passthrough.c index 1373182..050cd02 100644 --- a/palacios/src/devices/pci_passthrough.c +++ b/palacios/src/devices/pci_passthrough.c @@ -70,6 +70,7 @@ #define PCI_CAP_LIST_NEXT 1 #define PCI_CAP_ID_MSI 0x05 +#define PCI_CAP_ID_MSIX 0x11 union pci_addr_reg { @@ -108,9 +109,11 @@ struct pt_bar { }; -struct cfg_range { +struct cfg_range_hook { uint_t start; uint_t len; + int (*read)(struct vm_device *dev, uint_t reg_num, void* ptr, uint_t length); + int (*write)(struct vm_device *dev, uint_t reg_num, void* ptr, uint_t length); struct list_head list; }; @@ -686,23 +689,45 @@ static int pci_find_capability_and_pointer(struct pci_device *dev, int cap, uint return 0; } -static int cfg_add_virtual_range(uint_t reg, uint_t length, struct pt_dev_state *state) { - struct cfg_range *range = V3_Malloc(sizeof(*range)); +static int cfg_range_noop(struct vm_device *dev, uint_t reg_num, void *ptr, uint_t length) { + return 0; +} +static int cfg_range_read_virtual(struct vm_device *dev, uint_t reg_num, void *ptr, uint_t length) { + struct pt_dev_state * state = (struct pt_dev_state *)dev->private_data; + int i; + // read virtualized part of config space + for (i = 0; i < length; i++) { + *(uint8_t *)((uint8_t *)ptr + i) = state->pci_dev->config_space[reg_num + i]; + } + return 0; +} + +static int cfg_range_hook_add( + uint_t reg, + uint_t length, + int (*read)(struct vm_device *dev, uint_t reg_num, void *ptr, uint_t length), + int (*write)(struct vm_device *dev, uint_t reg_num, void *ptr, uint_t length), + struct pt_dev_state *state) { + struct cfg_range_hook *range = V3_Malloc(sizeof(*range)); if(!range) return -1; range->start = reg; range->len = length; + range->read = read; + range->write = write; list_add(&range->list, &state->cfg_virtual_ranges); - PrintDebug("%s: config space range %x:%x became virtual\n", + PrintDebug("%s: added hook to config space range %x:%x\n", state->name, range->start, range->start + range->len); return 0; } -static inline int cfg_range_virtual(uint_t reg, uint_t length, struct pt_dev_state *state) { - struct cfg_range *range; +static inline int cfg_range_hooked(uint_t reg, uint_t length, struct cfg_range_hook *rh, struct pt_dev_state *state) { + struct cfg_range_hook *range; list_for_each_entry(range, &state->cfg_virtual_ranges, list) { int b = range->start + range->len, a = range->start; - if (reg >= a && reg + length <= b) + if (reg >= a && reg + length <= b) { + *rh = *range; return 1; + } if (!(reg >= b || reg + length <= a)) { PrintError("%s: access to %x, len %x, bad overlapping with excluded range %x, len %x\n", state->name, reg, length, a, b); return -1; @@ -720,12 +745,17 @@ static int pt_config_update(uint_t reg_num, void * src, uint_t length, void * pr uint32_t addr = pci_addr.value | (reg_num & 3); int status = 0; - status = cfg_range_virtual(reg_num, length, state); - if(status == -1) + struct cfg_range_hook rh = {0}; + status = cfg_range_hooked(reg_num, length, &rh, state); + if(status == -1) return -1; + if(status) { + if(rh.write) + return rh.write(dev, reg_num, src, length); + PrintError("PCI CFG Range write hook is NULL for range %x:%x\n", rh.start, rh.start + rh.len); return -1; - if(status == 1) - return 0; // do not update this field really. + } + // no hook present, assume default behaviour if (length == 1) { status = pci_cfg_write(addr, length, *(uint8_t *)src); } else if (length == 2) { @@ -742,7 +772,6 @@ static int pt_config_update(uint_t reg_num, void * src, uint_t length, void * pr } //PrintDebug("%s: Update to %x reg, len %d, value %08x (addr %08x)\n", state->name, reg_num, length, *(uint32_t *)src, pci_addr.value); - return 0; } @@ -753,24 +782,25 @@ static int pt_config_read(uint_t reg_num, void * dst, uint_t length, void * priv pci_addr.reg = reg_num >> 2; int i, status; - status = cfg_range_virtual(reg_num, length, state); + struct cfg_range_hook rh = {0}; + status = cfg_range_hooked(reg_num, length, &rh, state); if(status == -1) return -1; - if(!status) { - uint32_t addr = pci_addr.value | (reg_num & 3); - if(pci_cfg_read(addr, length, dst) != 0) { - PrintError(" %s: Config space read from %x reg, length %d, failed\n", state->name, reg_num, length); - return -1; - } + if(status) { + if(rh.read) + return rh.read(dev, reg_num, dst, length); + PrintError("PCI CFG Range read hook is NULL for range %x:%x\n", rh.start, rh.start + rh.len); + return -1; + } - for (i = 0; i < length; i++) { - state->pci_dev->config_space[reg_num + i] = *(uint8_t *)((uint8_t *)dst + i); - } + // no range hook means default behaviour - read data from real config space and update virtual config space + uint32_t addr = pci_addr.value | (reg_num & 3); + if(pci_cfg_read(addr, length, dst) != 0) { + PrintError(" %s: Config space read from %x reg, length %d, failed\n", state->name, reg_num, length); + return -1; + } - } else { - // virtualized part of config space - for (i = 0; i < length; i++) { - *(uint8_t *)((uint8_t *)dst + i) = state->pci_dev->config_space[reg_num + i]; - } + for (i = 0; i < length; i++) { + state->pci_dev->config_space[reg_num + i] = *(uint8_t *)((uint8_t *)dst + i); } //PrintDebug("%s: Read from %x reg, len %d, value %08x\n", state->name, reg_num, length, *(uint32_t *)dst); @@ -843,6 +873,8 @@ static int pci_exclude_capability(struct v3_vm_info * vm_info, struct pt_dev_sta if(!strcmp(cap_name, "MSI")) { cap = PCI_CAP_ID_MSI; + } else if(!strcmp(cap_name, "MSI-X")) { + cap = PCI_CAP_ID_MSIX; } else { PrintError("Unknown capability name: %s\n", cap_name); return -1; @@ -856,7 +888,7 @@ static int pci_exclude_capability(struct v3_vm_info * vm_info, struct pt_dev_sta PrintDebug("%s capability at %x excluded.\n", cap_name, pos); dev->config_space[ppos] = dev->config_space[pos + PCI_CAP_LIST_NEXT] & ~3; dev->config_space[pos] = 0; - if(cfg_add_virtual_range(ppos & ~3, 4, state)) return -1; + if(cfg_range_hook_add(ppos & ~3, 4, cfg_range_read_virtual, cfg_range_noop, state)) return -1; } return 0; } @@ -970,23 +1002,23 @@ static int setup_virt_pci_dev(struct v3_vm_info * vm_info, struct vm_device * de INIT_LIST_HEAD(&state->cfg_virtual_ranges); int status = 0; // do not passthrough accesses to the following registers. - status |= cfg_add_virtual_range(0x0, 0x4, state); // device and vendor IDs - status |= cfg_add_virtual_range(0x8, 0x4, state); // revision ID and class code - status |= cfg_add_virtual_range(0x10, 0x30, state); // BARs etc. + status |= cfg_range_hook_add(0x0, 0x4, cfg_range_read_virtual, cfg_range_noop, state); // device and vendor IDs + status |= cfg_range_hook_add(0x8, 0x4, cfg_range_read_virtual, cfg_range_noop, state); // revision ID and class code + status |= cfg_range_hook_add(0x10, 0x30, cfg_range_read_virtual, cfg_range_noop, state); // BARs etc. if(status) return -1; cfg = v3_cfg_subtree(cfg, "exclude_cap"); while(cfg) { if(!strcmp(cfg->name, "exclude_cap")) { char *cap_name = v3_xml_txt(cfg); - V3_Print("Found exclude cap: %s\n", cap_name); - pci_exclude_capability(vm_info, state, cap_name); + V3_Print("Trying to exclude cap: %s\n", cap_name); + if(pci_exclude_capability(vm_info, state, cap_name)) return -1; } cfg = v3_cfg_next_branch(cfg); } - v3_sym_map_pci_passthrough(vm_info, pci_dev->bus_num, pci_dev->dev_num, pci_dev->fn_num); + if(v3_sym_map_pci_passthrough(vm_info, pci_dev->bus_num, pci_dev->dev_num, pci_dev->fn_num)) return -1; return 0;