X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=palacios%2Fsrc%2Fdevices%2Fpci.c;h=ab1442248a820916b1aa286702f6afcd5b0ecbcb;hb=d22c11cec4e8c3390bfe6bf16ed07f5d073f0d4a;hp=b65aefc73bc52e8145bc6cbef809a02d77232504;hpb=8a441df14ef65bb559ce090249343ec1dac1a7fc;p=palacios.git diff --git a/palacios/src/devices/pci.c b/palacios/src/devices/pci.c index b65aefc..ab14422 100644 --- a/palacios/src/devices/pci.c +++ b/palacios/src/devices/pci.c @@ -7,15 +7,15 @@ * and the University of New Mexico. You can find out more at * http://www.v3vee.org * + * Copyright (c) 2009, Jack Lange * Copyright (c) 2009, Lei Xia * Copyright (c) 2009, Chang Seok Bae - * Copyright (c) 2009, Jack Lange * Copyright (c) 2009, The V3VEE Project * All rights reserved. * - * Author: Lei Xia + * Author: Jack Lange + * Lei Xia * Chang Seok Bae - * Jack Lange * * This is free software. You are permitted to use, * redistribute, and modify it as specified in the file "V3VEE_LICENSE". @@ -28,11 +28,19 @@ #include #include #include +#include #include #include -#ifndef CONFIG_DEBUG_PCI +#include +#include + + +#include + + +#ifndef V3_CONFIG_DEBUG_PCI #undef PrintDebug #define PrintDebug(fmt, args...) #endif @@ -48,6 +56,10 @@ // This must always be a multiple of 8 #define MAX_BUS_DEVICES 32 +#define PCI_CAP_ID_MSI 0x05 +#define PCI_CAP_ID_MSIX 0x11 + + struct pci_addr_reg { union { uint32_t val; @@ -57,7 +69,8 @@ struct pci_addr_reg { uint_t fn_num : 3; uint_t dev_num : 5; uint_t bus_num : 8; - uint_t rsvd2 : 7; + uint_t hi_reg_num : 4; + uint_t rsvd2 : 3; uint_t enable : 1; } __attribute__((packed)); } __attribute__((packed)); @@ -77,9 +90,9 @@ struct pci_bus { uint8_t dev_map[MAX_BUS_DEVICES / 8]; - int (*raise_pci_irq)(struct vm_device * dev, struct pci_device * pci_dev); - int (*lower_pci_irq)(struct vm_device * dev, struct pci_device * pci_dev); - struct vm_device * irq_bridge_dev; + int (*raise_pci_irq)(struct pci_device * pci_dev, void * dev_data, struct v3_irq * vec); + int (*lower_pci_irq)(struct pci_device * pci_dev, void * dev_data, struct v3_irq * vec); + void * irq_dev_data; }; @@ -97,27 +110,79 @@ struct pci_internal { +struct cfg_range_hook { + uint32_t start; + uint32_t length; + + int (*write)(struct pci_device * pci_dev, uint32_t offset, + void * src, uint_t length, void * private_data); + + int (*read)(struct pci_device * pci_dev, uint32_t offset, + void * dst, uint_t length, void * private_data); + + void * private_data; + + struct list_head list_node; +}; + -#ifdef CONFIG_DEBUG_PCI +struct pci_cap { + uint8_t id; + uint32_t offset; + uint8_t enabled; + + struct list_head cap_node; +}; + + +// These mark read only fields in the pci config header. +// If a bit is 1, then the field is writable in the header +/* Notes: + * BIST is disabled by default (All writes to it will be dropped + * Cardbus CIS is disabled (All writes are dropped) + * Writes to capability pointer are disabled + */ +static uint8_t pci_hdr_write_mask_00[64] = { 0x00, 0x00, 0x00, 0x00, /* Device ID, Vendor ID */ + 0xbf, 0xff, 0x00, 0xf9, /* Command, status */ + 0x00, 0x00, 0x00, 0x00, /* Revision ID, Class code */ + 0x00, 0xff, 0x00, 0x00, /* CacheLine Size, Latency Timer, Header Type, BIST */ + 0xff, 0xff, 0xff, 0xff, /* BAR 0 */ + 0xff, 0xff, 0xff, 0xff, /* BAR 1 */ + 0xff, 0xff, 0xff, 0xff, /* BAR 2 */ + 0xff, 0xff, 0xff, 0xff, /* BAR 3 */ + 0xff, 0xff, 0xff, 0xff, /* BAR 4 */ + 0xff, 0xff, 0xff, 0xff, /* BAR 5 */ + 0x00, 0x00, 0x00, 0x00, /* CardBus CIS Ptr */ + 0xff, 0xff, 0xff, 0xff, /* SubSystem Vendor ID, SubSystem ID */ + 0xff, 0xff, 0xff, 0xff, /* ExpRom BAR */ + 0x00, 0x00, 0x00, 0x00, /* CAP ptr (0xfc to enable), RSVD */ + 0x00, 0x00, 0x00, 0x00, /* Reserved */ + 0xff, 0x00, 0x00, 0x00 /* INTR Line, INTR Pin, MIN_GNT, MAX_LAT */ +}; + + + + +#ifdef V3_CONFIG_DEBUG_PCI static void pci_dump_state(struct pci_internal * pci_state) { struct rb_node * node = v3_rb_first(&(pci_state->bus_list[0].devices)); struct pci_device * tmp_dev = NULL; - PrintDebug("===PCI: Dumping state Begin ==========\n"); + PrintDebug(VM_NONE, VCORE_NONE, "===PCI: Dumping state Begin ==========\n"); do { tmp_dev = rb_entry(node, struct pci_device, dev_tree_node); - PrintDebug("PCI Device Number: %d (%s):\n", tmp_dev->dev_num, tmp_dev->name); - PrintDebug("irq = %d\n", tmp_dev->config_header.intr_line); - PrintDebug("Vend ID: 0x%x\n", tmp_dev->config_header.vendor_id); - PrintDebug("Device ID: 0x%x\n", tmp_dev->config_header.device_id); + PrintDebug(VM_NONE, VCORE_NONE, "PCI Device Number: %d (%s):\n", tmp_dev->dev_num, tmp_dev->name); + PrintDebug(VM_NONE, VCORE_NONE, "irq = %d\n", tmp_dev->config_header.intr_line); + PrintDebug(VM_NONE, VCORE_NONE, "Vend ID: 0x%x\n", tmp_dev->config_header.vendor_id); + PrintDebug(VM_NONE, VCORE_NONE, "Device ID: 0x%x\n", tmp_dev->config_header.device_id); } while ((node = v3_rb_next(node))); - PrintDebug("====PCI: Dumping state End==========\n"); + PrintDebug(VM_NONE, VCORE_NONE, "====PCI: Dumping state End==========\n"); } #endif @@ -130,11 +195,11 @@ static int get_free_dev_num(struct pci_bus * bus) { int i, j; for (i = 0; i < sizeof(bus->dev_map); i++) { - PrintDebug("i=%d\n", i); + PrintDebug(VM_NONE, VCORE_NONE, "i=%d\n", i); if (bus->dev_map[i] != 0xff) { // availability for (j = 0; j < 8; j++) { - PrintDebug("\tj=%d\n", j); + PrintDebug(VM_NONE, VCORE_NONE, "\tj=%d\n", j); if (!(bus->dev_map[i] & (0x1 << j))) { return ((i * 8) + j); } @@ -220,200 +285,605 @@ static struct pci_device * get_device(struct pci_bus * bus, uint8_t dev_num, uin +// There won't be many hooks at all, so unordered lists are acceptible for now +static struct cfg_range_hook * find_cfg_range_hook(struct pci_device * pci, uint32_t start, uint32_t length) { + uint32_t end = start + length - 1; // end is inclusive + struct cfg_range_hook * hook = NULL; + list_for_each_entry(hook, &(pci->cfg_hooks), list_node) { + uint32_t hook_end = hook->start + hook->length - 1; + if (!((hook->start > end) || (hook_end < start))) { + return hook; + } + } + + return NULL; +} -static int addr_port_read(ushort_t port, void * dst, uint_t length, struct vm_device * dev) { - struct pci_internal * pci_state = (struct pci_internal *)dev->private_data; - int reg_offset = port & 0x3; - uint8_t * reg_addr = ((uint8_t *)&(pci_state->addr_reg.val)) + reg_offset; +int v3_pci_hook_config_range(struct pci_device * pci, + uint32_t start, uint32_t length, + int (*write)(struct pci_device * pci_dev, uint32_t offset, + void * src, uint_t length, void * private_data), + int (*read)(struct pci_device * pci_dev, uint32_t offset, + void * dst, uint_t length, void * private_data), + void * private_data) { + struct cfg_range_hook * hook = NULL; + - PrintDebug("Reading PCI Address Port (%x): %x len=%d\n", port, pci_state->addr_reg.val, length); + if (find_cfg_range_hook(pci, start, length)) { + PrintError(VM_NONE, VCORE_NONE, "Tried to hook an already hooked config region\n"); + return -1; + } + + hook = V3_Malloc(sizeof(struct cfg_range_hook)); - if (length == 4) { - if (reg_offset != 0) { - PrintError("Invalid Address Port Read\n"); - return -1; + if (!hook) { + PrintError(VM_NONE, VCORE_NONE, "Could not allocate range hook\n"); + return -1; + } + + memset(hook, 0, sizeof(struct cfg_range_hook)); + + hook->start = start; + hook->length = length; + hook->private_data = private_data; + hook->write = write; + hook->read = read; + + list_add(&(hook->list_node), &(pci->cfg_hooks)); + + return 0; + +} + + + + +// Note byte ordering: LSB -> MSB +static uint8_t msi_32_rw_bitmask[10] = { 0x00, 0x00, /* ID, next ptr */ + 0x71, 0x00, /* MSG CTRL */ + 0xfc, 0xff, 0xff, 0xff, /* MSG ADDR */ + 0xff, 0xff}; /* MSG DATA */ + +static uint8_t msi_64_rw_bitmask[14] = { 0x00, 0x00, /* ID, next ptr */ + 0x71, 0x00, /* MSG CTRL */ + 0xfc, 0xff, 0xff, 0xff, /* MSG LO ADDR */ + 0xff, 0xff, 0xff, 0xff, /* MSG HI ADDR */ + 0xff, 0xff}; /* MSG DATA */ + +static uint8_t msi_64pervect_rw_bitmask[24] = { 0x00, 0x00, /* ID, next ptr */ + 0x71, 0x00, /* MSG CTRL */ + 0xfc, 0xff, 0xff, 0xff, /* MSG LO CTRL */ + 0xff, 0xff, 0xff, 0xff, /* MSG HI ADDR */ + 0xff, 0xff, /* MSG DATA */ + 0x00, 0x00, /* RSVD */ + 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00}; + +static uint8_t msix_rw_bitmask[12] = { 0x00, 0x00, /* ID, next ptr */ + 0x00, 0x80, + 0xff, 0xff, 0xff, 0xff, + 0x08, 0xff, 0xff, 0xff}; + + +/* I am completely guessing what the format is here. + I only have version 1 of the PCIe spec and cannot download version 2 or 3 + without paying the PCI-SIG $3000 a year for membership. + So this is just cobbled together from the version 1 spec and KVM. +*/ + + +static uint8_t pciev1_rw_bitmask[20] = { 0x00, 0x00, /* ID, next ptr */ + 0x00, 0x00, /* PCIE CAP register */ + 0x00, 0x00, 0x00, 0x00, /* DEV CAP */ + 0xff, 0xff, /* DEV CTRL */ + 0x0f, 0x00, /* DEV STATUS */ + 0x00, 0x00, 0x00, 0x00, /* LINK CAP */ + 0xfb, 0x01, /* LINK CTRL */ + 0x00, 0x00 /* LINK STATUS */ +}; + + +static uint8_t pciev2_rw_bitmask[60] = { 0x00, 0x00, /* ID, next ptr */ + 0x00, 0x00, /* PCIE CAP register */ + 0x00, 0x00, 0x00, 0x00, /* DEV CAP */ + 0xff, 0xff, /* DEV CTRL */ + 0x0f, 0x00, /* DEV STATUS */ + 0x00, 0x00, 0x00, 0x00, /* LINK CAP */ + 0xfb, 0x01, /* LINK CTRL */ + 0x00, 0x00, /* LINK STATUS */ + 0x00, 0x00, 0x00, 0x00, /* SLOT CAP ?? */ + 0x00, 0x00, /* SLOT CTRL ?? */ + 0x00, 0x00, /* SLOT STATUS */ + 0x00, 0x00, /* ROOT CTRL */ + 0x00, 0x00, /* ROOT CAP */ + 0x00, 0x00, 0x00, 0x00, /* ROOT STATUS */ + 0x00, 0x00, 0x00, 0x00, /* WHO THE FUCK KNOWS */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; + +static uint8_t pm_rw_bitmask[] = { 0x00, 0x00, /* ID, next ptr */ + 0x00, 0x00, /* PWR MGMT CAPS */ + 0x03, 0x9f, /* PWR MGMT CTRL */ + 0x00, 0x00 /* PMCSR_BSE, Data */ +}; + + + +int cap_write(struct pci_device * pci, uint32_t offset, void * src, uint_t length, void * private_data) { + struct pci_cap * cap = private_data; + uint32_t cap_offset = cap->offset; + pci_cap_type_t cap_type = cap->id; + + uint32_t write_offset = offset - cap_offset; + void * cap_ptr = &(pci->config_space[cap_offset + 2]); + int i = 0; + + int msi_was_enabled = 0; + int msix_was_enabled = 0; + + + V3_Print(VM_NONE, VCORE_NONE, "CAP write trapped (val=%x, cfg_offset=%d, write_offset=%d)\n", *(uint32_t *)src, offset, write_offset); + + if (cap_type == PCI_CAP_MSI) { + struct msi_msg_ctrl * msg_ctrl = cap_ptr; + + if (msg_ctrl->msi_enable == 1) { + msi_was_enabled = 1; + } + } else if (cap_type == PCI_CAP_MSIX) { + struct msix_cap * msix_cap = cap_ptr; + + if (msix_cap->msg_ctrl.msix_enable == 1) { + msix_was_enabled = 1; + } + } + + for (i = 0; i < length; i++) { + uint8_t mask = 0; + + if (cap_type == PCI_CAP_MSI) { + struct msi_msg_ctrl * msg_ctrl = cap_ptr; + + V3_Print(VM_NONE, VCORE_NONE, "MSI Cap Ctrl=%x\n", *(uint16_t *)pci->msi_cap); + V3_Print(VM_NONE, VCORE_NONE, "MSI ADDR=%x\n", *(uint32_t *)(cap_ptr + 2)); + V3_Print(VM_NONE, VCORE_NONE, "MSI HI ADDR=%x\n", *(uint32_t *)(cap_ptr + 6)); + V3_Print(VM_NONE, VCORE_NONE, "MSI Data=%x\n", *(uint16_t *)(cap_ptr + 10)); + + if (msg_ctrl->cap_64bit) { + if (msg_ctrl->per_vect_mask) { + mask = msi_64pervect_rw_bitmask[write_offset]; + } else { + mask = msi_64_rw_bitmask[write_offset]; + } + } else { + mask = msi_32_rw_bitmask[write_offset]; + } + } else if (cap_type == PCI_CAP_MSIX) { + mask = msix_rw_bitmask[write_offset]; + } else if (cap_type == PCI_CAP_PCIE) { + struct pcie_cap_reg * pcie_cap = cap_ptr; + + if (pcie_cap->version == 1) { + mask = pciev1_rw_bitmask[write_offset]; + } else if (pcie_cap->version == 2) { + mask = pciev2_rw_bitmask[write_offset]; + } else { + return 0; + } + } else if (cap_type == PCI_CAP_PM) { + mask = pm_rw_bitmask[write_offset]; + } + + pci->config_space[offset + i] &= ~mask; + pci->config_space[offset + i] |= ((*(uint8_t *)(src + i)) & mask); + + write_offset++; + } + + + if (pci->cmd_update) { + + /* Detect changes to interrupt types for cmd updates */ + if (cap_type == PCI_CAP_MSI) { + struct msi_msg_ctrl * msg_ctrl = cap_ptr; + + V3_Print(VM_NONE, VCORE_NONE, "msi_was_enabled=%d, msi_is_enabled=%d\n", msi_was_enabled, msg_ctrl->msi_enable); + + if ((msg_ctrl->msi_enable == 1) && (msi_was_enabled == 0)) { + pci->irq_type = IRQ_MSI; + pci->cmd_update(pci, PCI_CMD_MSI_ENABLE, msg_ctrl->mult_msg_enable, pci->priv_data); + } else if ((msg_ctrl->msi_enable == 0) && (msi_was_enabled == 1)) { + pci->irq_type = IRQ_NONE; + pci->cmd_update(pci, PCI_CMD_MSI_DISABLE, 0, pci->priv_data); + } + } else if (cap_type == PCI_CAP_MSIX) { + struct msix_cap * msix_cap = cap_ptr; + + if ((msix_cap->msg_ctrl.msix_enable == 1) && (msix_was_enabled == 0)) { + pci->irq_type = IRQ_MSIX; + pci->cmd_update(pci, PCI_CMD_MSIX_ENABLE, msix_cap->msg_ctrl.table_size, pci->priv_data); + } else if ((msix_cap->msg_ctrl.msix_enable == 0) && (msix_was_enabled == 1)) { + pci->irq_type = IRQ_NONE; + pci->cmd_update(pci, PCI_CMD_MSIX_DISABLE, msix_cap->msg_ctrl.table_size, pci->priv_data); + } + } + } + + return 0; +} + + +static int init_pci_cap(struct pci_device * pci, pci_cap_type_t cap_type, uint_t cap_offset) { + void * cap_ptr = &(pci->config_space[cap_offset + 2]); + + if (cap_type == PCI_CAP_MSI) { + struct msi32_msg_addr * msi = cap_ptr; + + // We only expose a basic 32 bit MSI interface + msi->msg_ctrl.msi_enable = 0; + msi->msg_ctrl.mult_msg_enable = 0; + msi->msg_ctrl.cap_64bit = 0; + msi->msg_ctrl.per_vect_mask = 0; + + msi->addr.val = 0; + msi->data.val = 0; + + } else if (cap_type == PCI_CAP_MSIX) { + + + + } else if (cap_type == PCI_CAP_PCIE) { + struct pcie_cap_v2 * pcie = cap_ptr; + + // The v1 and v2 formats are identical for the first X bytes + // So we use the v2 struct, and only modify extended fields if v2 is detected + + pcie->dev_cap.fn_level_reset = 0; + + pcie->dev_ctrl.val &= 0x70e0; // only preserve max_payload_size and max_read_req_size untouched + pcie->dev_ctrl.relaxed_order_enable = 1; + pcie->dev_ctrl.no_snoop_enable = 1; + + pcie->dev_status.val = 0; + + pcie->link_cap.val &= 0x0003ffff; + + pcie->link_status.val &= 0x03ff; + + if (pcie->pcie_cap.version >= 2) { + pcie->slot_cap = 0; + pcie->slot_ctrl = 0; + pcie->slot_status = 0; + + pcie->root_ctrl = 0; + pcie->root_cap = 0; + pcie->root_status = 0; } - *(uint32_t *)dst = *(uint32_t *)reg_addr; - } else if (length == 2) { - if (reg_offset > 2) { - PrintError("Invalid Address Port Read\n"); + } else if (cap_type == PCI_CAP_PM) { + + } + + + return 0; +} + + +// enumerate all capabilities and disable them. +static int scan_pci_caps(struct pci_device * pci) { + uint_t cap_offset = pci->config_header.cap_ptr; + + V3_Print(VM_NONE, VCORE_NONE, "Scanning for Capabilities (cap_offset=%d)\n", cap_offset); + + while (cap_offset != 0) { + uint8_t id = pci->config_space[cap_offset]; + uint8_t next = pci->config_space[cap_offset + 1]; + + V3_Print(VM_NONE, VCORE_NONE, "Found Capability 0x%x at offset %d (0x%x)\n", + id, cap_offset, cap_offset); + + struct pci_cap * cap = V3_Malloc(sizeof(struct pci_cap)); + + if (!cap) { + PrintError(VM_NONE, VCORE_NONE, "Error allocating PCI CAP info\n"); return -1; } - *(uint16_t *)dst = *(uint16_t *)reg_addr; - } else if (length == 1) { - *(uint8_t *)dst = *(uint8_t *)reg_addr; - } else { - PrintError("Invalid read length (%d) for PCI address register\n", length); + memset(cap, 0, sizeof(struct pci_cap)); + + cap->id = id; + cap->offset = cap_offset; + + list_add(&(cap->cap_node), &(pci->capabilities)); + + // set correct init values + init_pci_cap(pci, id, cap_offset); + + + // set to the next pointer + cap_offset = next; + } + + // Disable Capabilities + pci->config_header.cap_ptr = 0; + + // Hook Cap pointer to return cached config space value + if (v3_pci_hook_config_range(pci, 0x34, 1, + NULL, NULL, NULL) == -1) { + PrintError(VM_NONE, VCORE_NONE, "Could not hook cap pointer\n"); return -1; } - return length; + +/* + // Disable all PCIE extended capabilities for now + pci->config_space[0x100] = 0; + pci->config_space[0x101] = 0; + pci->config_space[0x102] = 0; + pci->config_space[0x103] = 0; +*/ + + + return 0; + } +int v3_pci_enable_capability(struct pci_device * pci, pci_cap_type_t cap_type) { + uint32_t size = 0; + struct pci_cap * tmp_cap = NULL; + struct pci_cap * cap = NULL; + void * cap_ptr = NULL; -static int addr_port_write(ushort_t port, void * src, uint_t length, struct vm_device * dev) { - struct pci_internal * pci_state = (struct pci_internal *)dev->private_data; - int reg_offset = port & 0x3; - uint8_t * reg_addr = ((uint8_t *)&(pci_state->addr_reg.val)) + reg_offset; + list_for_each_entry(tmp_cap, &(pci->capabilities), cap_node) { + if (tmp_cap->id == cap_type) { + cap = tmp_cap; + break; + } + } - if (length == 4) { - if (reg_offset != 0) { - PrintError("Invalid Address Port Write\n"); - return -1; + if ((cap == NULL) || (cap->enabled)) { + return -1; + } + + + V3_Print(VM_NONE, VCORE_NONE, "Found Capability %x at %x (%d)\n", cap_type, cap->offset, cap->offset); + + // found the capability + + // mark it as enabled + cap->enabled = 1; + + cap_ptr = &(pci->config_space[cap->offset + 2]); + + if (cap_type == PCI_CAP_MSI) { + pci->msi_cap = cap_ptr; + + if (pci->msi_cap->cap_64bit) { + if (pci->msi_cap->per_vect_mask) { + // 64 bit MSI w/ per vector masking + size = 22; + } else { + // 64 bit MSI + size = 12; + } + } else { + // 32 bit MSI + size = 8; } + } else if (cap_type == PCI_CAP_MSIX) { + pci->msix_cap = cap_ptr; + + // disable passthrough for MSIX BAR + + pci->bar[pci->msix_cap->bir].type = PCI_BAR_MEM32; - PrintDebug("Writing PCI 4 bytes Val=%x\n", *(uint32_t *)src); + size = 10; + } else if (cap_type == PCI_CAP_PCIE) { + struct pcie_cap_reg * pcie_cap = (struct pcie_cap_reg *)&(pci->config_space[cap->offset + 2]); - *(uint32_t *)reg_addr = *(uint32_t *)src; - } else if (length == 2) { - if (reg_offset > 2) { - PrintError("Invalid Address Port Write\n"); + if (pcie_cap->version == 1) { + size = 20; + } else if (pcie_cap->version == 2) { + size = 60; + } else { return -1; } + } else if (cap_type == PCI_CAP_PM) { + size = 8; + } - PrintDebug("Writing PCI 2 byte Val=%x\n", *(uint16_t *)src); - *(uint16_t *)reg_addr = *(uint16_t *)src; - } else if (length == 1) { - PrintDebug("Writing PCI 1 byte Val=%x\n", *(uint8_t *)src); - *(uint8_t *)reg_addr = *(uint8_t *)src; - } else { - PrintError("Invalid write length (%d) for PCI address register\n", length); + V3_Print(VM_NONE, VCORE_NONE, "Hooking capability range (offset=%d, size=%d)\n", cap->offset, size); + + if (v3_pci_hook_config_range(pci, cap->offset, size + 2, + cap_write, NULL, cap) == -1) { + PrintError(VM_NONE, VCORE_NONE, "Could not hook config range (start=%d, size=%d)\n", + cap->offset + 2, size); return -1; } - PrintDebug("Writing PCI Address Port(%x): %x\n", port, pci_state->addr_reg.val); + + + // link it to the active capabilities list + pci->config_space[cap->offset + 1] = pci->config_header.cap_ptr; + pci->config_header.cap_ptr = cap->offset; // add to the head of the list + + return 0; +} + + + + +static int addr_port_read(struct guest_info * core, ushort_t port, void * dst, uint_t length, void * priv_data) { + struct pci_internal * pci_state = priv_data; + int reg_offset = port & 0x3; + uint8_t * reg_addr = ((uint8_t *)&(pci_state->addr_reg.val)) + reg_offset; + + PrintDebug(core->vm_info, core, "Reading PCI Address Port (%x): %x len=%d\n", port, pci_state->addr_reg.val, length); + + if (reg_offset + length > 4) { + PrintError(core->vm_info, core, "Invalid Address port write\n"); + return -1; + } + + memcpy(dst, reg_addr, length); return length; } -static int data_port_read(ushort_t port, void * dst, uint_t length, struct vm_device * vmdev) { - struct pci_internal * pci_state = (struct pci_internal *)(vmdev->private_data); +static int addr_port_write(struct guest_info * core, ushort_t port, void * src, uint_t length, void * priv_data) { + struct pci_internal * pci_state = priv_data; + int reg_offset = port & 0x3; + uint8_t * reg_addr = ((uint8_t *)&(pci_state->addr_reg.val)) + reg_offset; + + if (reg_offset + length > 4) { + PrintError(core->vm_info, core, "Invalid Address port write\n"); + return -1; + } + + // Set address register + memcpy(reg_addr, src, length); + + PrintDebug(core->vm_info, core, "Writing PCI Address Port(%x): AddrReg=%x (op_val = %x, len=%d) \n", port, pci_state->addr_reg.val, *(uint32_t *)src, length); + + return length; +} + + +static int data_port_read(struct guest_info * core, uint16_t port, void * dst, uint_t length, void * priv_data) { + struct pci_internal * pci_state = priv_data; struct pci_device * pci_dev = NULL; - uint_t reg_num = (pci_state->addr_reg.reg_num << 2) + (port & 0x3); - int i; + uint_t reg_num = (pci_state->addr_reg.hi_reg_num << 16) +(pci_state->addr_reg.reg_num << 2) + (port & 0x3); + int i = 0; + int bytes_left = length; if (pci_state->addr_reg.bus_num != 0) { - int i = 0; - for (i = 0; i < length; i++) { - *((uint8_t *)dst + i) = 0xff; - } - + memset(dst, 0xff, length); return length; } - PrintDebug("Reading PCI Data register. bus = %d, dev = %d, reg = %d (%x), cfg_reg = %x\n", + + pci_dev = get_device(&(pci_state->bus_list[0]), pci_state->addr_reg.dev_num, pci_state->addr_reg.fn_num); + + + if (pci_dev == NULL) { + memset(dst, 0xff, length); + return length; + } + + PrintDebug(core->vm_info, core, "Reading PCI Data register. bus = %d, dev = %d, fn = %d, reg = %d (%x), cfg_reg = %x\n", pci_state->addr_reg.bus_num, pci_state->addr_reg.dev_num, + pci_state->addr_reg.fn_num, reg_num, reg_num, pci_state->addr_reg.val); - pci_dev = get_device(&(pci_state->bus_list[0]), pci_state->addr_reg.dev_num, pci_state->addr_reg.fn_num); - - if (pci_dev == NULL) { - for (i = 0; i < length; i++) { - *(uint8_t *)((uint8_t *)dst + i) = 0xff; - } - return length; - } + while (bytes_left > 0) { + struct cfg_range_hook * cfg_hook = find_cfg_range_hook(pci_dev, reg_num + i, 1); + void * cfg_dst = &(pci_dev->config_space[reg_num + i]); - for (i = 0; i < length; i++) { - *(uint8_t *)((uint8_t *)dst + i) = pci_dev->config_space[reg_num + i]; - } + if (cfg_hook) { + uint_t range_len = cfg_hook->length - ((reg_num + i) - cfg_hook->start); + range_len = (range_len > bytes_left) ? bytes_left : range_len; - PrintDebug("\tVal=%x, len=%d\n", *(uint32_t *)dst, length); + if (cfg_hook->read) { + cfg_hook->read(pci_dev, reg_num + i, cfg_dst, range_len, cfg_hook->private_data); + } else { + if (pci_dev->config_read) { + if (pci_dev->config_read(pci_dev, reg_num + i, cfg_dst, range_len, pci_dev->priv_data) != 0) { + PrintError(core->vm_info, core, "Error in config_read from PCI device (%s)\n", pci_dev->name); + } + } + } + + bytes_left -= range_len; + i += range_len; + } else { + if (pci_dev->config_read) { + if (pci_dev->config_read(pci_dev, reg_num + i, cfg_dst, 1, pci_dev->priv_data) != 0) { + PrintError(core->vm_info, core, "Error in config_read from PCI device (%s)\n", pci_dev->name); + } + } + + bytes_left--; + i++; + } + } + + memcpy(dst, &(pci_dev->config_space[reg_num]), length); + + PrintDebug(core->vm_info, core, "\tVal=%x, len=%d\n", *(uint32_t *)dst, length); return length; } -static inline int is_cfg_reg_writable(uchar_t header_type, int reg_num) { - if (header_type == 0x00) { - switch (reg_num) { - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x08: - case 0x09: - case 0x0a: - case 0x0b: - case 0x0e: - case 0x3d: - return 0; - - default: - return 1; - - } - } else if (header_type == 0x80) { - switch (reg_num) { - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x08: - case 0x09: - case 0x0a: - case 0x0b: - case 0x0e: - case 0x3d: - return 0; - - default: - return 1; - - } - } else { - // PCI to PCI Bridge = 0x01 - // CardBus Bridge = 0x02 - // huh? - PrintError("Invalid PCI Header type (0x%.2x)\n", header_type); +static int bar_update(struct pci_device * pci_dev, uint32_t offset, + void * src, uint_t length, void * private_data) { + struct v3_pci_bar * bar = (struct v3_pci_bar *)private_data; + int bar_offset = offset & ~0x03; + int bar_num = (bar_offset - 0x10) / 4; + uint32_t new_val = *(uint32_t *)src; + + PrintDebug(VM_NONE, VCORE_NONE, "Updating BAR Register (Dev=%s) (bar=%d) (old_val=0x%x) (new_val=0x%x) (length=%d)\n", + pci_dev->name, bar_num, bar->val, new_val, length); - return -1; + // Cache the changes locally + memcpy(&(pci_dev->config_space[offset]), src, length); + + if (bar->type == PCI_BAR_PASSTHROUGH) { + if (bar->bar_write(bar_num, (void *)(pci_dev->config_space + bar_offset), bar->private_data) == -1) { + PrintError(VM_NONE, VCORE_NONE, "Error in Passthrough bar write operation\n"); + return -1; + } + + return 0; } -} + + // Else we are a virtualized BAR + *(uint32_t *)(pci_dev->config_space + offset) &= bar->mask; + + // Handle buggy code that discards the freaking I/O bit... + if (bar->type == PCI_BAR_IO && !(new_val & 0x1) ) { + PrintError(VM_NONE,VCORE_NONE,"Buggy guest: Updating BAR %d of device %s discards the I/O bit...\n", bar_num, pci_dev->name); + *(uint32_t *)(pci_dev->config_space + offset) |= 0x1; + new_val |= 0x1; + } -static int bar_update(struct pci_device * pci, int bar_num, uint32_t new_val) { - struct v3_pci_bar * bar = &(pci->bar[bar_num]); - PrintDebug("Updating BAR Register (Dev=%s) (bar=%d) (old_val=0x%x) (new_val=0x%x)\n", - pci->name, bar_num, bar->val, new_val); + // V3_Print(VM_NONE, VCORE_NONE,"mask=%x written val=%x\n", bar->mask, *(uint32_t *)(pci_dev->config_space + offset)); switch (bar->type) { case PCI_BAR_IO: { int i = 0; - PrintDebug("\tRehooking %d IO ports from base 0x%x to 0x%x for %d ports\n", + PrintDebug(VM_NONE, VCORE_NONE, "\tRehooking %d IO ports from base 0x%x to 0x%x for %d ports\n", bar->num_ports, PCI_IO_BASE(bar->val), PCI_IO_BASE(new_val), bar->num_ports); - - // only do this if pci device is enabled.... - if (!(pci->config_header.status & 0x1)) { - PrintError("PCI Device IO space not enabled\n"); - } for (i = 0; i < bar->num_ports; i++) { - PrintDebug("Rehooking PCI IO port (old port=%u) (new port=%u)\n", + PrintDebug(VM_NONE, VCORE_NONE, "Rehooking PCI IO port (old port=%u) (new port=%u)\n", PCI_IO_BASE(bar->val) + i, PCI_IO_BASE(new_val) + i); - v3_unhook_io_port(pci->vm_dev->vm, PCI_IO_BASE(bar->val) + i); + v3_unhook_io_port(pci_dev->vm, PCI_IO_BASE(bar->val) + i); - if (v3_hook_io_port(pci->vm_dev->vm, PCI_IO_BASE(new_val) + i, + if (v3_hook_io_port(pci_dev->vm, PCI_IO_BASE(new_val) + i, bar->io_read, bar->io_write, bar->private_data) == -1) { - PrintError("Could not hook PCI IO port (old port=%u) (new port=%u)\n", + PrintError(VM_NONE, VCORE_NONE, "Could not hook PCI IO port (old port=%u) (new port=%u)\n", PCI_IO_BASE(bar->val) + i, PCI_IO_BASE(new_val) + i); + //v3_print_io_map(pci_dev->vm); return -1; } } @@ -423,14 +893,14 @@ static int bar_update(struct pci_device * pci, int bar_num, uint32_t new_val) { break; } case PCI_BAR_MEM32: { - v3_unhook_mem(pci->vm_dev->vm, (addr_t)(bar->val)); + v3_unhook_mem(pci_dev->vm, V3_MEM_CORE_ANY, (addr_t)(bar->val)); if (bar->mem_read) { - v3_hook_full_mem(pci->vm_dev->vm, PCI_MEM32_BASE(new_val), + v3_hook_full_mem(pci_dev->vm, V3_MEM_CORE_ANY, PCI_MEM32_BASE(new_val), PCI_MEM32_BASE(new_val) + (bar->num_pages * PAGE_SIZE_4KB), - bar->mem_read, bar->mem_write, pci->vm_dev); + bar->mem_read, bar->mem_write, pci_dev->priv_data); } else { - PrintError("Write hooks not supported for PCI\n"); + PrintError(VM_NONE, VCORE_NONE, "Write hooks not supported for PCI\n"); return -1; } @@ -439,12 +909,12 @@ static int bar_update(struct pci_device * pci, int bar_num, uint32_t new_val) { break; } case PCI_BAR_NONE: { - PrintDebug("Reprogramming an unsupported BAR register (Dev=%s) (bar=%d) (val=%x)\n", - pci->name, bar_num, new_val); + PrintDebug(VM_NONE, VCORE_NONE, "Reprogramming an unsupported BAR register (Dev=%s) (bar=%d) (val=%x)\n", + pci_dev->name, bar_num, new_val); break; } default: - PrintError("Invalid Bar Reg updated (bar=%d)\n", bar_num); + PrintError(VM_NONE, VCORE_NONE, "Invalid Bar Reg updated (bar=%d)\n", bar_num); return -1; } @@ -452,18 +922,18 @@ static int bar_update(struct pci_device * pci, int bar_num, uint32_t new_val) { } -static int data_port_write(ushort_t port, void * src, uint_t length, struct vm_device * vmdev) { - struct pci_internal * pci_state = (struct pci_internal *)vmdev->private_data; +static int data_port_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) { + struct pci_internal * pci_state = priv_data; struct pci_device * pci_dev = NULL; - uint_t reg_num = (pci_state->addr_reg.reg_num << 2) + (port & 0x3); - int i; - + uint_t reg_num = (pci_state->addr_reg.hi_reg_num << 16) +(pci_state->addr_reg.reg_num << 2) + (port & 0x3); + int i = 0; + int ret = length; if (pci_state->addr_reg.bus_num != 0) { return length; } - PrintDebug("Writing PCI Data register. bus = %d, dev = %d, fn = %d, reg = %d (%x) addr_reg = %x (val=%x, len=%d)\n", + PrintDebug(VM_NONE, VCORE_NONE, "Writing PCI Data register. bus = %d, dev = %d, fn = %d, reg = %d (0x%x) addr_reg = 0x%x (val=0x%x, len=%d)\n", pci_state->addr_reg.bus_num, pci_state->addr_reg.dev_num, pci_state->addr_reg.fn_num, @@ -475,161 +945,363 @@ static int data_port_write(ushort_t port, void * src, uint_t length, struct vm_d pci_dev = get_device(&(pci_state->bus_list[0]), pci_state->addr_reg.dev_num, pci_state->addr_reg.fn_num); if (pci_dev == NULL) { - PrintError("Writing configuration space for non-present device (dev_num=%d)\n", + PrintError(VM_NONE, VCORE_NONE, "Writing configuration space for non-present device (dev_num=%d)\n", pci_state->addr_reg.dev_num); return -1; } - - for (i = 0; i < length; i++) { - uint_t cur_reg = reg_num + i; - int writable = is_cfg_reg_writable(pci_dev->config_header.header_type, cur_reg); - - if (writable == -1) { - PrintError("Invalid PCI configuration space\n"); - return -1; - } + /* update the config space + If a hook has been registered for a given region, call the hook with the max write length + */ + while (length > 0) { + struct cfg_range_hook * cfg_hook = find_cfg_range_hook(pci_dev, reg_num + i, 1); - if (writable) { - pci_dev->config_space[cur_reg] = *(uint8_t *)((uint8_t *)src + i); + if (cfg_hook) { + uint_t range_len = cfg_hook->length - ((reg_num + i) - cfg_hook->start); + range_len = (range_len > length) ? length : range_len; + + if (cfg_hook->write) { + cfg_hook->write(pci_dev, reg_num + i, (void *)(src + i), range_len, cfg_hook->private_data); + } - if ((cur_reg >= 0x10) && (cur_reg < 0x28)) { - // BAR Register Update - int bar_reg = ((cur_reg & ~0x3) - 0x10) / 4; - - pci_dev->bar_update_flag = 1; - pci_dev->bar[bar_reg].updated = 1; - - // PrintDebug("Updating BAR register %d\n", bar_reg); + length -= range_len; + i += range_len; + } else { + // send the writes to the cached config space, and to the generic callback if present + uint8_t mask = 0xff; - } else if ((cur_reg >= 0x30) && (cur_reg < 0x34)) { - // Extension ROM update + if (reg_num < 64) { + mask = pci_hdr_write_mask_00[reg_num + i]; + } + + if (mask != 0) { + uint8_t new_val = *(uint8_t *)(src + i); + uint8_t old_val = pci_dev->config_space[reg_num + i]; - pci_dev->ext_rom_update_flag = 1; - } else if (cur_reg == 0x04) { - // COMMAND update - uint8_t command = *((uint8_t *)src + i); + pci_dev->config_space[reg_num + i] = ((new_val & mask) | (old_val & ~mask)); - PrintError("command update for %s old=%x new=%x\n", - pci_dev->name, - pci_dev->config_space[cur_reg],command); - - pci_dev->config_space[cur_reg] = command; - - if (pci_dev->cmd_update) { - pci_dev->cmd_update(pci_dev, (command & 0x01), (command & 0x02)); + if (pci_dev->config_write) { + pci_dev->config_write(pci_dev, reg_num + i, &(pci_dev->config_space[reg_num + i]), 1, pci_dev->priv_data); } - - } else if (cur_reg == 0x0f) { - // BIST update - pci_dev->config_header.BIST = 0x00; - } - } else { - PrintError("PCI Write to read only register %d\n", cur_reg); + } + + length--; + i++; } } - if (pci_dev->config_update) { - pci_dev->config_update(pci_dev, reg_num, length); - } + return ret; +} - // Scan for BAR updated - if (pci_dev->bar_update_flag) { - for (i = 0; i < 6; i++) { - if (pci_dev->bar[i].updated) { - int bar_offset = 0x10 + 4 * i; - *(uint32_t *)(pci_dev->config_space + bar_offset) &= pci_dev->bar[i].mask; - // check special flags.... - // bar_update - if (bar_update(pci_dev, i, *(uint32_t *)(pci_dev->config_space + bar_offset)) == -1) { - PrintError("PCI Device %s: Bar update Error Bar=%d\n", pci_dev->name, i); - return -1; - } +static int exp_rom_write(struct pci_device * pci_dev, uint32_t offset, + void * src, uint_t length, void * private_data) { + int bar_offset = offset & ~0x03; - pci_dev->bar[i].updated = 0; - } - } - pci_dev->bar_update_flag = 0; + if (pci_dev->config_write) { + pci_dev->config_write(pci_dev, offset, src, length, pci_dev->priv_data); } - if ((pci_dev->ext_rom_update_flag) && (pci_dev->ext_rom_update)) { - pci_dev->ext_rom_update(pci_dev); - pci_dev->ext_rom_update_flag = 0; + if (pci_dev->exp_rom_update) { + pci_dev->exp_rom_update(pci_dev, (void *)(pci_dev->config_space + bar_offset), pci_dev->priv_data); + + return 0; } + PrintError(VM_NONE, VCORE_NONE, "Expansion ROM update not handled. Will appear to not Exist\n"); - return length; + return 0; } +static int cmd_write(struct pci_device * pci_dev, uint32_t offset, + void * src, uint_t length, void * private_data) { + + PrintDebug(VM_NONE, VCORE_NONE, "PCI command update!\n"); + + int i = 0; + + struct pci_cmd_reg old_cmd; + struct pci_cmd_reg new_cmd; + if (pci_dev->config_write) { + pci_dev->config_write(pci_dev, offset, src, length, pci_dev->priv_data); + } + old_cmd.val = pci_dev->config_header.command; + + for (i = 0; i < length; i++) { + uint8_t mask = pci_hdr_write_mask_00[offset + i]; + uint8_t new_val = *(uint8_t *)(src + i); + uint8_t old_val = pci_dev->config_space[offset + i]; + + pci_dev->config_space[offset + i] = ((new_val & mask) | (old_val & ~mask)); + } + + new_cmd.val = pci_dev->config_header.command; + + if (pci_dev->cmd_update) { + if ((new_cmd.intx_disable == 1) && (old_cmd.intx_disable == 0)) { + pci_dev->irq_type = IRQ_NONE; + pci_dev->cmd_update(pci_dev, PCI_CMD_INTX_DISABLE, 0, pci_dev->priv_data); + } else if ((new_cmd.intx_disable == 0) && (old_cmd.intx_disable == 1)) { + pci_dev->irq_type = IRQ_INTX; + pci_dev->cmd_update(pci_dev, PCI_CMD_INTX_ENABLE, 0, pci_dev->priv_data); + } + + + if ((new_cmd.dma_enable == 1) && (old_cmd.dma_enable == 0)) { + pci_dev->cmd_update(pci_dev, PCI_CMD_DMA_ENABLE, 0, pci_dev->priv_data); + } else if ((new_cmd.dma_enable == 0) && (old_cmd.dma_enable == 1)) { + pci_dev->cmd_update(pci_dev, PCI_CMD_DMA_DISABLE, 0, pci_dev->priv_data); + } + } -static int pci_reset_device(struct vm_device * dev) { - PrintDebug("pci: reset device\n"); return 0; } -static int pci_start_device(struct vm_device * dev) { - PrintDebug("pci: start device\n"); - return 0; +static void init_pci_busses(struct pci_internal * pci_state) { + int i; + + for (i = 0; i < PCI_BUS_COUNT; i++) { + pci_state->bus_list[i].bus_num = i; + pci_state->bus_list[i].devices.rb_node = NULL; + memset(pci_state->bus_list[i].dev_map, 0, sizeof(pci_state->bus_list[i].dev_map)); + } } -static int pci_stop_device(struct vm_device * dev) { - PrintDebug("pci: stop device\n"); +static int pci_free(struct pci_internal * pci_state) { + int i; + + + // cleanup devices + for (i = 0; i < PCI_BUS_COUNT; i++) { + struct pci_bus * bus = &(pci_state->bus_list[i]); + struct rb_node * node = v3_rb_first(&(bus->devices)); + struct pci_device * dev = NULL; + + while (node) { + dev = rb_entry(node, struct pci_device, dev_tree_node); + node = v3_rb_next(node); + + v3_rb_erase(&(dev->dev_tree_node), &(bus->devices)); + + // Free config range hooks + { + struct cfg_range_hook * hook = NULL; + struct cfg_range_hook * tmp = NULL; + list_for_each_entry_safe(hook, tmp, &(dev->cfg_hooks), list_node) { + list_del(&(hook->list_node)); + V3_Free(hook); + } + } + + // Free caps + { + struct pci_cap * cap = NULL; + struct pci_cap * tmp = NULL; + list_for_each_entry_safe(cap, tmp, &(dev->cfg_hooks), cap_node) { + list_del(&(cap->cap_node)); + V3_Free(cap); + } + } + + V3_Free(dev); + } + + } + + V3_Free(pci_state); return 0; } +#ifdef V3_CONFIG_CHECKPOINT +#include -static int pci_free(struct vm_device * dev) { - int i = 0; - - for (i = 0; i < 4; i++){ - v3_dev_unhook_io(dev, CONFIG_ADDR_PORT + i); - v3_dev_unhook_io(dev, CONFIG_DATA_PORT + i); +static int pci_save_extended(struct v3_chkpt *chkpt, char *id, void * private_data) { + struct pci_internal * pci = (struct pci_internal *)private_data; + struct v3_chkpt_ctx *ctx=0; + char buf[128]; + int i = 0; + + ctx = v3_chkpt_open_ctx(chkpt,id); + + if (!ctx) { + PrintError(VM_NONE, VCORE_NONE, "Unable to open base context on save\n"); + goto savefailout; } - + + V3_CHKPT_SAVE(ctx, "ADDR_REG", pci->addr_reg.val, savefailout); + V3_CHKPT_SAVE(ctx, "IO_BASE", pci->dev_io_base, savefailout); + + v3_chkpt_close_ctx(ctx); ctx=0; + + for (i = 0; i < PCI_BUS_COUNT; i++) { + struct pci_bus * bus = &(pci->bus_list[i]); + struct rb_node * node = v3_rb_first(&(bus->devices)); + struct pci_device * dev = NULL; + + snprintf(buf, 128, "%s-%d", id, i); + + ctx = v3_chkpt_open_ctx(chkpt, buf); + + if (!ctx) { + PrintError(VM_NONE, VCORE_NONE, "Failed to open context for %s\n", buf); + goto savefailout; + } + + // nothing actually saved on the bus context... (later expansion) + + v3_chkpt_close_ctx(ctx); ctx=0; + + while (node) { + int bar_idx = 0; + dev = rb_entry(node, struct pci_device, dev_tree_node); + + snprintf(buf, 128, "%s-%d.%d-%d", id, i, dev->dev_num, dev->fn_num); + + ctx = v3_chkpt_open_ctx(chkpt, buf); + + if (!ctx) { + PrintError(VM_NONE, VCORE_NONE, "Failed to open context for device\n"); + goto savefailout; + } + + V3_CHKPT_SAVE(ctx, "CONFIG_SPACE", dev->config_space, savefailout); + + for (bar_idx = 0; bar_idx < 6; bar_idx++) { + snprintf(buf, 128, "BAR-%d", bar_idx); + V3_CHKPT_SAVE(ctx, buf, dev->bar[bar_idx].val, savefailout); + } + + v3_chkpt_close_ctx(ctx); ctx=0; + + node = v3_rb_next(node); + } + } + +// goodout: + return 0; + + savefailout: + PrintError(VM_NONE, VCORE_NONE, "Failed to save PCI\n"); + if (ctx) { v3_chkpt_close_ctx(ctx); } + return -1; + } +static int pci_load_extended(struct v3_chkpt *chkpt, char *id, void * private_data) { + struct pci_internal * pci = (struct pci_internal *)private_data; + struct v3_chkpt_ctx *ctx=0; + char buf[128]; + int i = 0; + + ctx = v3_chkpt_open_ctx(chkpt,id); -static void init_pci_busses(struct pci_internal * pci_state) { - int i; + if (!ctx) { + PrintError(VM_NONE, VCORE_NONE, "Unable to open base context on load\n"); + goto loadfailout; + } + + V3_CHKPT_LOAD(ctx, "ADDR_REG", pci->addr_reg.val, loadfailout); + V3_CHKPT_LOAD(ctx, "IO_BASE", pci->dev_io_base, loadfailout); + + v3_chkpt_close_ctx(ctx); ctx=0; for (i = 0; i < PCI_BUS_COUNT; i++) { - pci_state->bus_list[i].bus_num = i; - pci_state->bus_list[i].devices.rb_node = NULL; - memset(pci_state->bus_list[i].dev_map, 0, sizeof(pci_state->bus_list[i].dev_map)); + struct pci_bus * bus = &(pci->bus_list[i]); + struct rb_node * node = v3_rb_first(&(bus->devices)); + struct pci_device * dev = NULL; + + snprintf(buf, 128, "pci-%d", i); + + ctx = v3_chkpt_open_ctx(chkpt, buf); + + if (!ctx) { + PrintError(VM_NONE, VCORE_NONE, "Failed to open context for %s\n", buf); + goto loadfailout; + } + + // nothing actually saved on the bus context... (later expansion) + + v3_chkpt_close_ctx(ctx); ctx=0; + + while (node) { + int bar_idx = 0; + dev = rb_entry(node, struct pci_device, dev_tree_node); + + snprintf(buf, 128, "pci-%d.%d-%d", i, dev->dev_num, dev->fn_num); + + ctx = v3_chkpt_open_ctx(chkpt, buf); + + if (!ctx) { + PrintError(VM_NONE, VCORE_NONE, "Failed to open context for device\n"); + goto loadfailout; + } + + V3_CHKPT_LOAD(ctx, "CONFIG_SPACE", dev->config_space, loadfailout); + + for (bar_idx = 0; bar_idx < 6; bar_idx++) { + snprintf(buf, 128, "BAR-%d", bar_idx); + V3_CHKPT_LOAD(ctx, buf, dev->bar[bar_idx].val, loadfailout); + } + + v3_chkpt_close_ctx(ctx); ctx=0; + + node = v3_rb_next(node); + } } + +// goodout: + return 0; + + loadfailout: + PrintError(VM_NONE, VCORE_NONE, "Failed to load PCI\n"); + if (ctx) { v3_chkpt_close_ctx(ctx); } + return -1; + } +#endif + + static struct v3_device_ops dev_ops = { - .free = pci_free, - .reset = pci_reset_device, - .start = pci_start_device, - .stop = pci_stop_device, + .free = (int (*)(void *))pci_free, +#ifdef V3_CONFIG_CHECKPOINT + .save_extended = pci_save_extended, + .load_extended = pci_load_extended +#endif }; -static int pci_init(struct guest_info * vm, void * cfg_data) { +static int pci_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { struct pci_internal * pci_state = V3_Malloc(sizeof(struct pci_internal)); + + if (!pci_state) { + PrintError(vm, VCORE_NONE, "Cannot allocate in init\n"); + return -1; + } + int i = 0; + char * dev_id = v3_cfg_val(cfg, "ID"); + int ret = 0; - PrintDebug("PCI internal at %p\n",(void *)pci_state); + PrintDebug(vm, VCORE_NONE, "PCI internal at %p\n",(void *)pci_state); - struct vm_device * dev = v3_allocate_device("PCI", &dev_ops, pci_state); + struct vm_device * dev = v3_add_device(vm, dev_id, &dev_ops, pci_state); - if (v3_attach_device(vm, dev) == -1) { - PrintError("Could not attach device %s\n", "PCI"); + if (dev == NULL) { + PrintError(vm, VCORE_NONE, "Could not attach device %s\n", dev_id); + V3_Free(pci_state); return -1; } @@ -639,11 +1311,17 @@ static int pci_init(struct guest_info * vm, void * cfg_data) { init_pci_busses(pci_state); - PrintDebug("Sizeof config header=%d\n", (int)sizeof(struct pci_config_header)); + PrintDebug(vm, VCORE_NONE, "Sizeof config header=%d\n", (int)sizeof(struct pci_config_header)); for (i = 0; i < 4; i++) { - v3_dev_hook_io(dev, CONFIG_ADDR_PORT + i, &addr_port_read, &addr_port_write); - v3_dev_hook_io(dev, CONFIG_DATA_PORT + i, &data_port_read, &data_port_write); + ret |= v3_dev_hook_io(dev, CONFIG_ADDR_PORT + i, &addr_port_read, &addr_port_write); + ret |= v3_dev_hook_io(dev, CONFIG_DATA_PORT + i, &data_port_read, &data_port_write); + } + + if (ret != 0) { + PrintError(vm, VCORE_NONE, "Error hooking PCI IO ports\n"); + v3_remove_device(dev); + return -1; } return 0; @@ -653,82 +1331,94 @@ static int pci_init(struct guest_info * vm, void * cfg_data) { device_register("PCI", pci_init) -static inline int init_bars(struct pci_device * pci_dev) { +static inline int init_bars(struct v3_vm_info * vm, struct pci_device * pci_dev) { int i = 0; for (i = 0; i < 6; i++) { int bar_offset = 0x10 + (4 * i); + struct v3_pci_bar * bar = &(pci_dev->bar[i]); - if (pci_dev->bar[i].type == PCI_BAR_IO) { + if (bar->type == PCI_BAR_IO) { int j = 0; - pci_dev->bar[i].mask = (~((pci_dev->bar[i].num_ports) - 1)) | 0x01; + bar->mask = (~((bar->num_ports) - 1)) | 0x01; - if (pci_dev->bar[i].default_base_port != 0xffff) { - pci_dev->bar[i].val = pci_dev->bar[i].default_base_port & pci_dev->bar[i].mask; + if (bar->default_base_port != 0xffff) { + bar->val = bar->default_base_port & bar->mask; } else { - pci_dev->bar[i].val = 0; + bar->val = 0; } - pci_dev->bar[i].val |= 0x00000001; + bar->val |= 0x00000001; - for (j = 0; j < pci_dev->bar[i].num_ports; j++) { + for (j = 0; j < bar->num_ports; j++) { // hook IO - if (pci_dev->bar[i].default_base_port != 0xffff) { - if (v3_hook_io_port(pci_dev->vm_dev->vm, pci_dev->bar[i].default_base_port + j, - pci_dev->bar[i].io_read, pci_dev->bar[i].io_write, - pci_dev->bar[i].private_data) == -1) { - PrintError("Could not hook default io port %x\n", pci_dev->bar[i].default_base_port + j); + if (bar->default_base_port != 0xffff) { + if (v3_hook_io_port(vm, bar->default_base_port + j, + bar->io_read, bar->io_write, + bar->private_data) == -1) { + PrintError(vm, VCORE_NONE, "Could not hook default io port %x\n", bar->default_base_port + j); return -1; } } } - *(uint32_t *)(pci_dev->config_space + bar_offset) = pci_dev->bar[i].val; + *(uint32_t *)(pci_dev->config_space + bar_offset) = bar->val; + + } else if (bar->type == PCI_BAR_MEM32) { + bar->mask = ~((bar->num_pages << 12) - 1); + bar->mask |= 0xf; // preserve the configuration flags - } else if (pci_dev->bar[i].type == PCI_BAR_MEM32) { - pci_dev->bar[i].mask = ~((pci_dev->bar[i].num_pages << 12) - 1); - pci_dev->bar[i].mask |= 0xf; // preserve the configuration flags - if (pci_dev->bar[i].default_base_addr != 0xffffffff) { - pci_dev->bar[i].val = pci_dev->bar[i].default_base_addr & pci_dev->bar[i].mask; + if (bar->default_base_addr != 0xffffffff) { + bar->val = bar->default_base_addr & bar->mask; } else { - pci_dev->bar[i].val = 0; + bar->val = 0; } // hook memory - if (pci_dev->bar[i].mem_read) { + if (bar->mem_read) { // full hook - v3_hook_full_mem(pci_dev->vm_dev->vm, pci_dev->bar[i].default_base_addr, - pci_dev->bar[i].default_base_addr + (pci_dev->bar[i].num_pages * PAGE_SIZE_4KB), - pci_dev->bar[i].mem_read, pci_dev->bar[i].mem_write, pci_dev->vm_dev); - } else if (pci_dev->bar[i].mem_write) { + v3_hook_full_mem(vm, V3_MEM_CORE_ANY, bar->default_base_addr, + bar->default_base_addr + (bar->num_pages * PAGE_SIZE_4KB), + bar->mem_read, bar->mem_write, pci_dev->priv_data); + } else if (bar->mem_write) { // write hook - PrintError("Write hooks not supported for PCI devices\n"); + PrintError(vm, VCORE_NONE, "Write hooks not supported for PCI devices\n"); return -1; /* - v3_hook_write_mem(pci_dev->vm_dev->vm, pci_dev->bar[i].default_base_addr, - pci_dev->bar[i].default_base_addr + (pci_dev->bar[i].num_pages * PAGE_SIZE_4KB), - pci_dev->bar[i].mem_write, pci_dev->vm_dev); + v3_hook_write_mem(pci_dev->vm_dev->vm, bar->default_base_addr, + bar->default_base_addr + (bar->num_pages * PAGE_SIZE_4KB), + bar->mem_write, pci_dev->vm_dev); */ } else { // set the prefetchable flag... - pci_dev->bar[i].val |= 0x00000008; + bar->val |= 0x00000008; } - *(uint32_t *)(pci_dev->config_space + bar_offset) = pci_dev->bar[i].val; + *(uint32_t *)(pci_dev->config_space + bar_offset) = bar->val; - } else if (pci_dev->bar[i].type == PCI_BAR_MEM16) { - PrintError("16 Bit memory ranges not supported (reg: %d)\n", i); + } else if (bar->type == PCI_BAR_MEM24) { + PrintError(vm, VCORE_NONE, "16 Bit memory ranges not supported (reg: %d)\n", i); return -1; - } else if (pci_dev->bar[i].type == PCI_BAR_NONE) { - pci_dev->bar[i].val = 0x00000000; - pci_dev->bar[i].mask = 0x00000000; // This ensures that all updates will be dropped - *(uint32_t *)(pci_dev->config_space + bar_offset) = pci_dev->bar[i].val; + } else if (bar->type == PCI_BAR_NONE) { + bar->val = 0x00000000; + bar->mask = 0x00000000; // This ensures that all updates will be dropped + *(uint32_t *)(pci_dev->config_space + bar_offset) = bar->val; + } else if (bar->type == PCI_BAR_PASSTHROUGH) { + + // Call the bar init function to get the local cached value + bar->bar_init(i, &(bar->val), bar->private_data); + + // Copy back changes it made + *(uint32_t *)(pci_dev->config_space + bar_offset) = bar->val; + } else { - PrintError("Invalid BAR type for bar #%d\n", i); + PrintError(vm, VCORE_NONE, "Invalid BAR type for bar #%d\n", i); return -1; } + + v3_pci_hook_config_range(pci_dev, bar_offset, 4, bar_update, NULL, bar); } return 0; @@ -736,33 +1426,142 @@ static inline int init_bars(struct pci_device * pci_dev) { int v3_pci_set_irq_bridge(struct vm_device * pci_bus, int bus_num, - int (*raise_pci_irq)(struct vm_device * dev, struct pci_device * pci_dev), - int (*lower_pci_irq)(struct vm_device * dev, struct pci_device * pci_dev), - struct vm_device * bridge_dev) { + int (*raise_pci_irq)(struct pci_device * pci_dev, void * dev_data, struct v3_irq * vec), + int (*lower_pci_irq)(struct pci_device * pci_dev, void * dev_data, struct v3_irq * vec), + void * priv_data) { struct pci_internal * pci_state = (struct pci_internal *)pci_bus->private_data; pci_state->bus_list[bus_num].raise_pci_irq = raise_pci_irq; pci_state->bus_list[bus_num].lower_pci_irq = lower_pci_irq; - pci_state->bus_list[bus_num].irq_bridge_dev = bridge_dev; + pci_state->bus_list[bus_num].irq_dev_data = priv_data; return 0; } -int v3_pci_raise_irq(struct vm_device * pci_bus, int bus_num, struct pci_device * dev) { - struct pci_internal * pci_state = (struct pci_internal *)pci_bus->private_data; - struct pci_bus * bus = &(pci_state->bus_list[bus_num]); +int v3_pci_raise_irq(struct vm_device * pci_bus, struct pci_device * dev, uint32_t vec_index) { + struct v3_irq vec; + + vec.ack = NULL; + vec.private_data = NULL; + vec.irq = vec_index; - return bus->raise_pci_irq(bus->irq_bridge_dev, dev); + return v3_pci_raise_acked_irq(pci_bus, dev, vec); } -int v3_pci_lower_irq(struct vm_device * pci_bus, int bus_num, struct pci_device * dev) { +int v3_pci_lower_irq(struct vm_device * pci_bus, struct pci_device * dev, uint32_t vec_index) { + struct v3_irq vec; + + vec.irq = vec_index; + vec.ack = NULL; + vec.private_data = NULL; + + return v3_pci_lower_acked_irq(pci_bus, dev, vec); +} + +int v3_pci_raise_acked_irq(struct vm_device * pci_bus, struct pci_device * dev, struct v3_irq vec) { struct pci_internal * pci_state = (struct pci_internal *)pci_bus->private_data; - struct pci_bus * bus = &(pci_state->bus_list[bus_num]); + struct pci_bus * bus = &(pci_state->bus_list[dev->bus_num]); + + + if (dev->irq_type == IRQ_INTX) { + return bus->raise_pci_irq(dev, bus->irq_dev_data, &vec); + } else if (dev->irq_type == IRQ_MSI) { + struct v3_gen_ipi ipi; + struct msi_addr * addr = NULL; + struct msi_data * data = NULL; + + if (dev->msi_cap->cap_64bit) { + if (dev->msi_cap->per_vect_mask) { + struct msi64_pervec_msg_addr * msi = (void *)dev->msi_cap; + addr = &(msi->addr); + data = &(msi->data); + } else { + struct msi64_msg_addr * msi = (void *)dev->msi_cap; + addr = &(msi->addr); + data = &(msi->data); + } + } else { + struct msi32_msg_addr * msi = (void *)dev->msi_cap; + addr = &(msi->addr); + data = &(msi->data); + } + + memset(&ipi, 0, sizeof(struct v3_gen_ipi)); + + // decode MSI fields into IPI + + ipi.vector = data->vector + vec.irq; + ipi.mode = data->del_mode; + ipi.logical = addr->dst_mode; + ipi.trigger_mode = data->trig_mode; + ipi.dst_shorthand = 0; + ipi.dst = addr->dst_id; + + + v3_apic_send_ipi(dev->vm, &ipi, dev->apic_dev); + + return 0; + } else if (dev->irq_type == IRQ_MSIX) { + addr_t msix_table_gpa = 0; + struct msix_table * msix_table = NULL; + uint_t bar_idx = dev->msix_cap->bir; + struct v3_gen_ipi ipi; + struct msi_addr * addr = NULL; + struct msi_data * data = NULL; + + if (dev->bar[bar_idx].type != PCI_BAR_MEM32) { + PrintError(VM_NONE, VCORE_NONE, "Non 32bit MSIX BAR registers are not supported\n"); + return -1; + } + + msix_table_gpa = dev->bar[bar_idx].val; + msix_table_gpa += dev->msix_cap->table_offset; + + if (v3_gpa_to_hva(&(dev->vm->cores[0]), msix_table_gpa, (void *)&(msix_table)) != 0) { + PrintError(VM_NONE, VCORE_NONE, "Could not translate MSIX Table GPA (%p)\n", (void *)msix_table_gpa); + return -1; + } + + memset(&ipi, 0, sizeof(struct v3_gen_ipi)); + + data = &(msix_table->entries[vec.irq].data); + addr = &(msix_table->entries[vec.irq].addr);; + + // decode MSIX fields into IPI + ipi.vector = data->vector + vec.irq; + ipi.mode = data->del_mode; + ipi.logical = addr->dst_mode; + ipi.trigger_mode = data->trig_mode; + ipi.dst_shorthand = 0; + ipi.dst = addr->dst_id; + + + + V3_Print(VM_NONE, VCORE_NONE, "Decode MSIX\n"); + + v3_apic_send_ipi(dev->vm, &ipi, dev->apic_dev); + + return 0; + } + + // Should never get here + return -1; + +} - return bus->lower_pci_irq(bus->irq_bridge_dev, dev); +int v3_pci_lower_acked_irq(struct vm_device * pci_bus, struct pci_device * dev, struct v3_irq vec) { + if (dev->irq_type == IRQ_INTX) { + struct pci_internal * pci_state = (struct pci_internal *)pci_bus->private_data; + struct pci_bus * bus = &(pci_state->bus_list[dev->bus_num]); + + return bus->lower_pci_irq(dev, bus->irq_dev_data, &vec); + } else { + return -1; + } } + // if dev_num == -1, auto assign struct pci_device * v3_pci_register_device(struct vm_device * pci, pci_device_type_t dev_type, @@ -771,10 +1570,13 @@ struct pci_device * v3_pci_register_device(struct vm_device * pci, int fn_num, const char * name, struct v3_pci_bar * bars, - int (*config_update)(struct pci_device * pci_dev, uint_t reg_num, int length), - int (*cmd_update)(struct pci_device *pci_dev, uchar_t io_enabled, uchar_t mem_enabled), - int (*ext_rom_update)(struct pci_device * pci_dev), - struct vm_device * dev) { + int (*config_write)(struct pci_device * pci_dev, uint32_t reg_num, void * src, + uint_t length, void * priv_data), + int (*config_read)(struct pci_device * pci_dev, uint32_t reg_num, void * dst, + uint_t length, void * priv_data), + int (*cmd_update)(struct pci_device * pci_dev, pci_cmd_t cmd, uint64_t arg, void * priv_data), + int (*exp_rom_update)(struct pci_device * pci_dev, uint32_t * src, void * priv_data), + void * priv_data) { struct pci_internal * pci_state = (struct pci_internal *)pci->private_data; struct pci_bus * bus = &(pci_state->bus_list[bus_num]); @@ -782,22 +1584,23 @@ struct pci_device * v3_pci_register_device(struct vm_device * pci, int i; if (dev_num > MAX_BUS_DEVICES) { - PrintError("Requested Invalid device number (%d)\n", dev_num); + PrintError(VM_NONE, VCORE_NONE, "Requested Invalid device number (%d)\n", dev_num); return NULL; } if (dev_num == PCI_AUTO_DEV_NUM) { - PrintDebug("Searching for free device number\n"); + PrintDebug(VM_NONE, VCORE_NONE, "Searching for free device number\n"); if ((dev_num = get_free_dev_num(bus)) == -1) { - PrintError("No more available PCI slots on bus %d\n", bus->bus_num); + PrintError(VM_NONE, VCORE_NONE, "No more available PCI slots on bus %d\n", bus->bus_num); return NULL; } + V3_Print(VM_NONE, VCORE_NONE,"assigning dev num %d to device (%s, busnum=%d,fnnum=%d)\n", dev_num, name, bus->bus_num, fn_num); } - PrintDebug("Checking for PCI Device\n"); + PrintDebug(VM_NONE, VCORE_NONE, "Checking for PCI Device\n"); if (get_device(bus, dev_num, fn_num) != NULL) { - PrintError("PCI Device already registered at slot %d on bus %d\n", + PrintError(VM_NONE, VCORE_NONE, "PCI Device already registered at slot %d on bus %d\n", dev_num, bus->bus_num); return NULL; } @@ -806,14 +1609,16 @@ struct pci_device * v3_pci_register_device(struct vm_device * pci, pci_dev = (struct pci_device *)V3_Malloc(sizeof(struct pci_device)); if (pci_dev == NULL) { - PrintError("Could not allocate pci device\n"); + PrintError(VM_NONE, VCORE_NONE, "Could not allocate pci device\n"); return NULL; } memset(pci_dev, 0, sizeof(struct pci_device)); + + pci_dev->type = dev_type; - switch (dev_type) { + switch (pci_dev->type) { case PCI_STD_DEVICE: pci_dev->config_header.header_type = 0x00; break; @@ -821,23 +1626,65 @@ struct pci_device * v3_pci_register_device(struct vm_device * pci, pci_dev->config_header.header_type = 0x80; break; default: - PrintError("Unhandled PCI Device Type: %d\n", dev_type); + PrintError(VM_NONE, VCORE_NONE, "Unhandled PCI Device Type: %d\n", dev_type); return NULL; } + + pci_dev->bus_num = bus_num; pci_dev->dev_num = dev_num; pci_dev->fn_num = fn_num; strncpy(pci_dev->name, name, sizeof(pci_dev->name)); - pci_dev->vm_dev = dev; + pci_dev->name[sizeof(pci_dev->name)-1] = 0; + pci_dev->vm = pci->vm; + pci_dev->priv_data = priv_data; + + INIT_LIST_HEAD(&(pci_dev->cfg_hooks)); + INIT_LIST_HEAD(&(pci_dev->capabilities)); + + + { + // locate APIC for MSI/MSI-X + pci_dev->apic_dev = v3_find_dev(pci->vm, "apic"); + } // register update callbacks - pci_dev->config_update = config_update; + pci_dev->config_write = config_write; + pci_dev->config_read = config_read; pci_dev->cmd_update = cmd_update; - pci_dev->ext_rom_update = ext_rom_update; + pci_dev->exp_rom_update = exp_rom_update; + + if (config_read) { + int i = 0; + + // Only 256 bytes for now, should expand it in the future + for (i = 0; i < 256; i++) { + config_read(pci_dev, i, &(pci_dev->config_space[i]), 1, pci_dev->priv_data); + } + } + + V3_Print(VM_NONE, VCORE_NONE, "Scanning for Capabilities\n"); + + // scan for caps + scan_pci_caps(pci_dev); + + pci_dev->irq_type = IRQ_INTX; + + V3_Print(VM_NONE, VCORE_NONE, "Caps scanned\n"); + + // hook important regions + v3_pci_hook_config_range(pci_dev, 0x30, 4, exp_rom_write, NULL, NULL); // ExpRom + v3_pci_hook_config_range(pci_dev, 0x04, 2, cmd_write, NULL, NULL); // CMD Reg + // * Status resets + // * Drop BIST + // + + + //copy bars for (i = 0; i < 6; i ++) { pci_dev->bar[i].type = bars[i].type; @@ -862,6 +1709,9 @@ struct pci_device * v3_pci_register_device(struct vm_device * pci, pci_dev->bar[i].default_base_addr = bars[i].default_base_addr; pci_dev->bar[i].mem_read = bars[i].mem_read; pci_dev->bar[i].mem_write = bars[i].mem_write; + } else if (pci_dev->bar[i].type == PCI_BAR_PASSTHROUGH) { + pci_dev->bar[i].bar_init = bars[i].bar_init; + pci_dev->bar[i].bar_write = bars[i].bar_write; } else { pci_dev->bar[i].num_pages = 0; pci_dev->bar[i].default_base_addr = 0; @@ -870,17 +1720,18 @@ struct pci_device * v3_pci_register_device(struct vm_device * pci, } } - if (init_bars(pci_dev) == -1) { - PrintError("could not initialize bar registers\n"); + if (init_bars(pci->vm, pci_dev) == -1) { + PrintError(VM_NONE, VCORE_NONE, "could not initialize bar registers\n"); return NULL; } // add the device add_device_to_bus(bus, pci_dev); -#ifdef CONFIG_DEBUG_PCI +#ifdef V3_CONFIG_DEBUG_PCI pci_dump_state(pci_state); #endif return pci_dev; } +