From: Jack Lange Date: Tue, 17 Apr 2012 22:19:30 +0000 (-0400) Subject: added host_pci passthrough PCI support X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=ffd6d915b585a4a17a2d5b081313f0968885a105 added host_pci passthrough PCI support --- diff --git a/linux_module/Makefile b/linux_module/Makefile index 98416cb..2236f81 100644 --- a/linux_module/Makefile +++ b/linux_module/Makefile @@ -43,6 +43,8 @@ v3vee-$(V3_CONFIG_EXT_ENV_INJECT) += iface-env-inject.o v3vee-$(V3_CONFIG_EXT_SELECTIVE_SYSCALL_EXIT) += iface-syscall.o +v3vee-$(V3_CONFIG_HOST_PCI) += iface-host-pci.o + v3vee-objs := $(v3vee-y) ../libv3vee.a obj-m := v3vee.o diff --git a/linux_module/iface-host-pci-hw.h b/linux_module/iface-host-pci-hw.h new file mode 100644 index 0000000..b08d897 --- /dev/null +++ b/linux_module/iface-host-pci-hw.h @@ -0,0 +1,444 @@ +/* Linux host side PCI passthrough support + * Jack Lange , 2012 + */ + +#include +#include +#include +#include + + +#define PCI_HDR_SIZE 256 + + +static int setup_hw_pci_dev(struct host_pci_device * host_dev) { + int ret = 0; + struct pci_dev * dev = NULL; + struct v3_host_pci_dev * v3_dev = &(host_dev->v3_dev); + + dev = pci_get_bus_and_slot(host_dev->hw_dev.bus, + host_dev->hw_dev.devfn); + + + if (dev == NULL) { + printk("Could not find HW pci device (bus=%d, devfn=%d)\n", + host_dev->hw_dev.bus, host_dev->hw_dev.devfn); + return -1; + } + + // record pointer in dev state + host_dev->hw_dev.dev = dev; + + host_dev->hw_dev.intx_disabled = 1; + spin_lock_init(&(host_dev->hw_dev.intx_lock)); + + if (pci_enable_device(dev)) { + printk("Could not enable Device\n"); + return -1; + } + + ret = pci_request_regions(dev, "v3vee"); + if (ret != 0) { + printk("Could not reservce PCI regions\n"); + return -1; + } + + + pci_reset_function(host_dev->hw_dev.dev); + pci_save_state(host_dev->hw_dev.dev); + + + { + int i = 0; + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + printk("Resource %d\n", i); + printk("\tflags = 0x%lx\n", pci_resource_flags(dev, i)); + printk("\t name=%s, start=%lx, size=%d\n", + host_dev->hw_dev.dev->resource[i].name, (uintptr_t)pci_resource_start(dev, i), + (u32)pci_resource_len(dev, i)); + + } + + printk("Rom BAR=%d\n", dev->rom_base_reg); + } + + /* Cache first 6 BAR regs */ + { + int i = 0; + + for (i = 0; i < 6; i++) { + struct v3_host_pci_bar * bar = &(v3_dev->bars[i]); + unsigned long flags; + + bar->size = pci_resource_len(dev, i); + bar->addr = pci_resource_start(dev, i); + flags = pci_resource_flags(dev, i); + + if (flags & IORESOURCE_IO) { + bar->type = PT_BAR_IO; + } else if (flags & IORESOURCE_MEM) { + if (flags & IORESOURCE_MEM_64) { + printk("ERROR: 64 Bit BARS not yet supported\n"); + bar->type = PT_BAR_NONE; + } else if (flags & IORESOURCE_DMA) { + bar->type = PT_BAR_MEM24; + } else { + bar->type = PT_BAR_MEM32; + } + + bar->cacheable = ((flags & IORESOURCE_CACHEABLE) != 0); + bar->prefetchable = ((flags & IORESOURCE_PREFETCH) != 0); + + } else { + bar->type = PT_BAR_NONE; + } + } + } + + /* Cache expansion rom bar */ + { + struct resource * rom_res = &(dev->resource[PCI_ROM_RESOURCE]); + int rom_size = pci_resource_len(dev, PCI_ROM_RESOURCE); + + if (rom_size > 0) { + unsigned long flags; + + v3_dev->exp_rom.size = rom_size; + v3_dev->exp_rom.addr = pci_resource_start(dev, PCI_ROM_RESOURCE); + flags = pci_resource_flags(dev, PCI_ROM_RESOURCE); + + v3_dev->exp_rom.type = PT_EXP_ROM; + + v3_dev->exp_rom.exp_rom_enabled = rom_res->flags & IORESOURCE_ROM_ENABLE; + } + } + + /* Cache entire configuration space */ + { + int m = 0; + + // Copy the configuration space to the local cached version + for (m = 0; m < PCI_HDR_SIZE; m += 4) { + pci_read_config_dword(dev, m, (u32 *)&(v3_dev->cfg_space[m])); + } + } + + + /* HARDCODED for now but this will need to depend on IOMMU support detection */ + if (iommu_found()) { + printk("Setting host PCI device (%s) as IOMMU\n", host_dev->name); + v3_dev->iface = IOMMU; + } else { + printk("Setting host PCI device (%s) as SYMBIOTIC\n", host_dev->name); + v3_dev->iface = SYMBIOTIC; + } + + return 0; + +} + + + +static irqreturn_t host_pci_intx_irq_handler(int irq, void * priv_data) { + struct host_pci_device * host_dev = priv_data; + + // printk("Host PCI IRQ handler (%d)\n", irq); + + spin_lock(&(host_dev->hw_dev.intx_lock)); + disable_irq_nosync(irq); + host_dev->hw_dev.intx_disabled = 1; + spin_unlock(&(host_dev->hw_dev.intx_lock)); + + V3_host_pci_raise_irq(&(host_dev->v3_dev), 0); + + return IRQ_HANDLED; +} + + + +static irqreturn_t host_pci_msi_irq_handler(int irq, void * priv_data) { + struct host_pci_device * host_dev = priv_data; + // printk("Host PCI MSI IRQ Handler (%d)\n", irq); + + V3_host_pci_raise_irq(&(host_dev->v3_dev), 0); + + return IRQ_HANDLED; +} + +static irqreturn_t host_pci_msix_irq_handler(int irq, void * priv_data) { + struct host_pci_device * host_dev = priv_data; + int i = 0; + + // printk("Host PCI MSIX IRQ Handler (%d)\n", irq); + + // find vector index + for (i = 0; i < host_dev->hw_dev.num_msix_vecs; i++) { + if (irq == host_dev->hw_dev.msix_entries[i].vector) { + V3_host_pci_raise_irq(&(host_dev->v3_dev), i); + } else { + printk("Error Could not find matching MSIX vector for IRQ %d\n", irq); + } + } + return IRQ_HANDLED; +} + + +static int hw_pci_cmd(struct host_pci_device * host_dev, host_pci_cmd_t cmd, u64 arg) { + //struct v3_host_pci_dev * v3_dev = &(host_dev->v3_dev); + struct pci_dev * dev = host_dev->hw_dev.dev; + + switch (cmd) { + case HOST_PCI_CMD_DMA_DISABLE: + printk("Passthrough PCI device disabling BMDMA\n"); + pci_clear_master(host_dev->hw_dev.dev); + break; + case HOST_PCI_CMD_DMA_ENABLE: + printk("Passthrough PCI device Enabling BMDMA\n"); + pci_set_master(host_dev->hw_dev.dev); + break; + + case HOST_PCI_CMD_INTX_DISABLE: + printk("Passthrough PCI device disabling INTx IRQ\n"); + + disable_irq(dev->irq); + free_irq(dev->irq, (void *)host_dev); + + break; + case HOST_PCI_CMD_INTX_ENABLE: + printk("Passthrough PCI device Enabling INTx IRQ\n"); + + if (request_threaded_irq(dev->irq, NULL, host_pci_intx_irq_handler, + IRQF_ONESHOT, "V3Vee_Host_PCI_INTx", (void *)host_dev)) { + printk("ERROR Could not assign IRQ to host PCI device (%s)\n", host_dev->name); + } + + break; + + case HOST_PCI_CMD_MSI_DISABLE: + printk("Passthrough PCI device Disabling MSIs\n"); + + disable_irq(dev->irq); + free_irq(dev->irq, (void *)host_dev); + + pci_disable_msi(dev); + + break; + case HOST_PCI_CMD_MSI_ENABLE: + printk("Passthrough PCI device Enabling MSI\n"); + + if (!dev->msi_enabled) { + pci_enable_msi(dev); + + if (request_irq(dev->irq, host_pci_msi_irq_handler, + 0, "V3Vee_host_PCI_MSI", (void *)host_dev)) { + printk("Error Requesting IRQ %d for Passthrough MSI IRQ\n", dev->irq); + } + } + + break; + + + + case HOST_PCI_CMD_MSIX_ENABLE: { + int i = 0; + + printk("Passthrough PCI device Enabling MSIX\n"); + host_dev->hw_dev.num_msix_vecs = arg;; + host_dev->hw_dev.msix_entries = kcalloc(host_dev->hw_dev.num_msix_vecs, + sizeof(struct msix_entry), GFP_KERNEL); + + for (i = 0; i < host_dev->hw_dev.num_msix_vecs; i++) { + host_dev->hw_dev.msix_entries[i].entry = i; + } + + pci_enable_msix(dev, host_dev->hw_dev.msix_entries, + host_dev->hw_dev.num_msix_vecs); + + for (i = 0; i < host_dev->hw_dev.num_msix_vecs; i++) { + if (request_irq(host_dev->hw_dev.msix_entries[i].vector, + host_pci_msix_irq_handler, + 0, "V3VEE_host_PCI_MSIX", (void *)host_dev)) { + printk("Error requesting IRQ %d for Passthrough MSIX IRQ\n", + host_dev->hw_dev.msix_entries[i].vector); + } + } + + break; + } + + case HOST_PCI_CMD_MSIX_DISABLE: { + int i = 0; + + printk("Passthrough PCI device Disabling MSIX\n"); + + for (i = 0; i < host_dev->hw_dev.num_msix_vecs; i++) { + disable_irq(host_dev->hw_dev.msix_entries[i].vector); + } + + for (i = 0; i < host_dev->hw_dev.num_msix_vecs; i++) { + free_irq(host_dev->hw_dev.msix_entries[i].vector, (void *)host_dev); + } + + host_dev->hw_dev.num_msix_vecs = 0; + kfree(host_dev->hw_dev.msix_entries); + + pci_disable_msix(dev); + + break; + } + default: + printk("Error: unhandled passthrough PCI command: %d\n", cmd); + return -1; + + } + + return 0; +} + + +static int hw_ack_irq(struct host_pci_device * host_dev, u32 vector) { + struct pci_dev * dev = host_dev->hw_dev.dev; + unsigned long flags; + + // printk("Acking IRQ vector %d\n", vector); + + spin_lock_irqsave(&(host_dev->hw_dev.intx_lock), flags); + // printk("Enabling IRQ %d\n", dev->irq); + enable_irq(dev->irq); + host_dev->hw_dev.intx_disabled = 0; + spin_unlock_irqrestore(&(host_dev->hw_dev.intx_lock), flags); + + return 0; +} + + + + +static int reserve_hw_pci_dev(struct host_pci_device * host_dev, void * v3_ctx) { + int ret = 0; + unsigned long flags; + struct v3_host_pci_dev * v3_dev = &(host_dev->v3_dev); + struct pci_dev * dev = host_dev->hw_dev.dev; + + spin_lock_irqsave(&lock, flags); + if (host_dev->hw_dev.in_use == 0) { + host_dev->hw_dev.in_use = 1; + } else { + ret = -1; + } + spin_unlock_irqrestore(&lock, flags); + + + if (v3_dev->iface == IOMMU) { + struct v3_guest_mem_region region; + int flags = 0; + + host_dev->hw_dev.iommu_domain = iommu_domain_alloc(); + + if (V3_get_guest_mem_region(v3_ctx, ®ion) == -1) { + printk("Error getting VM memory region for IOMMU support\n"); + return -1; + } + + printk("Memory region: start=%p, end=%p\n", (void *)region.start, (void *)region.end); + + + flags = IOMMU_READ | IOMMU_WRITE; // Need to see what IOMMU_CACHE means + + /* This version could be wrong */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) + // Guest VAs start at zero and go to end of memory + iommu_map_range(host_dev->hw_dev.iommu_domain, 0, region.start, (region.end - region.start), flags); +#else + /* Linux actually made the interface worse... Now you can only map memory in powers of 2 (meant to only be pages...) */ + { + u64 size = region.end - region.start; + u32 page_size = 512 * 4096; // assume large 64bit pages (2MB) + u64 dpa = 0; // same as gpa + u64 hpa = region.start; + + do { + if (size < page_size) { + page_size = 4096; // less than a 2MB granularity, so we switch to small pages (4KB) + } + + printk("Mapping IOMMU region dpa=%p hpa=%p (size=%d)\n", (void *)dpa, (void *)hpa, page_size); + + if (iommu_map(host_dev->hw_dev.iommu_domain, dpa, hpa, + get_order(page_size), flags)) { + printk("ERROR: Could not map sub region (DPA=%p) (HPA=%p) (order=%d)\n", + (void *)dpa, (void *)hpa, get_order(page_size)); + break; + } + + hpa += page_size; + dpa += page_size; + + size -= page_size; + } while (size); + } +#endif + + if (iommu_attach_device(host_dev->hw_dev.iommu_domain, &(dev->dev))) { + printk("ERROR attaching host PCI device to IOMMU domain\n"); + } + + } + + + printk("Requesting Threaded IRQ handler for IRQ %d\n", dev->irq); + // setup regular IRQs until advanced IRQ mechanisms are enabled + if (request_threaded_irq(dev->irq, NULL, host_pci_intx_irq_handler, + IRQF_ONESHOT, "V3Vee_Host_PCI_INTx", (void *)host_dev)) { + printk("ERROR Could not assign IRQ to host PCI device (%s)\n", host_dev->name); + } + + + + + return ret; +} + + + +static int write_hw_pci_config(struct host_pci_device * host_dev, u32 reg, void * data, u32 length) { + struct pci_dev * dev = host_dev->hw_dev.dev; + + if (reg < 64) { + return 0; + } + + if (length == 1) { + pci_write_config_byte(dev, reg, *(u8 *)data); + } else if (length == 2) { + pci_write_config_word(dev, reg, *(u16 *)data); + } else if (length == 4) { + pci_write_config_dword(dev, reg, *(u32 *)data); + } else { + printk("Invalid length of host PCI config update\n"); + return -1; + } + + return 0; +} + + + +static int read_hw_pci_config(struct host_pci_device * host_dev, u32 reg, void * data, u32 length) { + struct pci_dev * dev = host_dev->hw_dev.dev; + + + if (length == 1) { + pci_read_config_byte(dev, reg, data); + } else if (length == 2) { + pci_read_config_word(dev, reg, data); + } else if (length == 4) { + pci_read_config_dword(dev, reg, data); + } else { + printk("Invalid length of host PCI config read\n"); + return -1; + } + + + return 0; +} diff --git a/linux_module/iface-host-pci.c b/linux_module/iface-host-pci.c new file mode 100644 index 0000000..ce6ec7b --- /dev/null +++ b/linux_module/iface-host-pci.c @@ -0,0 +1,244 @@ +/* Host PCI interface + * (c) Jack Lange, 2012 + * jacklange@cs.pitt.edu + */ + +#include +#include +#include +#include + +#include "palacios.h" +#include "linux-exts.h" + + +#include + +static struct list_head device_list; +static spinlock_t lock; + + + + +struct pci_dev; +struct iommu_domain; + +struct host_pci_device { + char name[128]; + + enum {PASSTHROUGH, USER} type; + + enum {INTX_IRQ, MSI_IRQ, MSIX_IRQ} irq_type; + uint32_t num_vecs; + + union { + struct { + u8 in_use; + u8 iommu_enabled; + + u32 bus; + u32 devfn; + + spinlock_t intx_lock; + u8 intx_disabled; + + u32 num_msix_vecs; + struct msix_entry * msix_entries; + struct iommu_domain * iommu_domain; + + struct pci_dev * dev; + } hw_dev; + + // struct user_dev_state user_dev; + }; + + struct v3_host_pci_dev v3_dev; + + struct list_head dev_node; +}; + + +//#include "iface-host-pci-user.h" +#include "iface-host-pci-hw.h" + + +static struct host_pci_device * find_dev_by_name(char * name) { + struct host_pci_device * dev = NULL; + + list_for_each_entry(dev, &device_list, dev_node) { + if (strncmp(dev->name, name, 128) == 0) { + return dev; + } + } + + return NULL; +} + + + +static struct v3_host_pci_dev * request_pci_dev(char * url, void * v3_ctx) { + + unsigned long flags; + struct host_pci_device * host_dev = NULL; + + spin_lock_irqsave(&lock, flags); + host_dev = find_dev_by_name(url); + spin_unlock_irqrestore(&lock, flags); + + if (host_dev == NULL) { + printk("Could not find host device (%s)\n", url); + return NULL; + } + + if (host_dev->type == PASSTHROUGH) { + if (reserve_hw_pci_dev(host_dev, v3_ctx) == -1) { + printk("Could not reserve host device (%s)\n", url); + return NULL; + } + } else { + printk("Unsupported Host device type\n"); + return NULL; + } + + + + return &(host_dev->v3_dev); + +} + + +static int host_pci_config_write(struct v3_host_pci_dev * v3_dev, unsigned int reg_num, + void * src, unsigned int length) { + struct host_pci_device * host_dev = v3_dev->host_data; + + if (host_dev->type == PASSTHROUGH) { + return write_hw_pci_config(host_dev, reg_num, src, length); + } + + printk("Error in config write handler\n"); + return -1; +} + +static int host_pci_config_read(struct v3_host_pci_dev * v3_dev, unsigned int reg_num, + void * dst, unsigned int length) { + struct host_pci_device * host_dev = v3_dev->host_data; + + if (host_dev->type == PASSTHROUGH) { + return read_hw_pci_config(host_dev, reg_num, dst, length); + } + + printk("Error in config read handler\n"); + return -1; +} + + +static int host_pci_ack_irq(struct v3_host_pci_dev * v3_dev, unsigned int vector) { + struct host_pci_device * host_dev = v3_dev->host_data; + + if (host_dev->type == PASSTHROUGH) { + return hw_ack_irq(host_dev, vector); + } + + printk("Error in config irq ack handler\n"); + return -1; +} + + + +static int host_pci_cmd(struct v3_host_pci_dev * v3_dev, host_pci_cmd_t cmd, u64 arg) { + struct host_pci_device * host_dev = v3_dev->host_data; + + if (host_dev->type == PASSTHROUGH) { + return hw_pci_cmd(host_dev, cmd, arg); + } + + printk("Error in config pci cmd handler\n"); + return -1; + +} + +static struct v3_host_pci_hooks pci_hooks = { + .request_device = request_pci_dev, + .config_write = host_pci_config_write, + .config_read = host_pci_config_read, + .ack_irq = host_pci_ack_irq, + .pci_cmd = host_pci_cmd, + +}; + + + +static int register_pci_hw_dev(unsigned int cmd, unsigned long arg) { + void __user * argp = (void __user *)arg; + struct v3_hw_pci_dev hw_dev_arg ; + struct host_pci_device * host_dev = NULL; + unsigned long flags; + int ret = 0; + + if (copy_from_user(&hw_dev_arg, argp, sizeof(struct v3_hw_pci_dev))) { + printk("%s(%d): copy from user error...\n", __FILE__, __LINE__); + return -EFAULT; + } + + host_dev = kzalloc(sizeof(struct host_pci_device), GFP_KERNEL); + + + strncpy(host_dev->name, hw_dev_arg.name, 128); + host_dev->v3_dev.host_data = host_dev; + + + host_dev->type = PASSTHROUGH; + host_dev->hw_dev.bus = hw_dev_arg.bus; + host_dev->hw_dev.devfn = PCI_DEVFN(hw_dev_arg.dev, hw_dev_arg.func); + + + spin_lock_irqsave(&lock, flags); + if (!find_dev_by_name(hw_dev_arg.name)) { + list_add(&(host_dev->dev_node), &device_list); + ret = 1; + } + spin_unlock_irqrestore(&lock, flags); + + if (ret == 0) { + // Error device already exists + kfree(host_dev); + return -EFAULT; + } + + + setup_hw_pci_dev(host_dev); + + return 0; +} + + +static int register_pci_user_dev(unsigned int cmd, unsigned long arg) { + return 0; +} + + + + +static int host_pci_init( void ) { + INIT_LIST_HEAD(&(device_list)); + spin_lock_init(&lock); + + V3_Init_Host_PCI(&pci_hooks); + + + add_global_ctrl(V3_ADD_PCI_HW_DEV, register_pci_hw_dev); + add_global_ctrl(V3_ADD_PCI_USER_DEV, register_pci_user_dev); + + return 0; +} + + + +static struct linux_ext host_pci_ext = { + .name = "HOST_PCI", + .init = host_pci_init, +}; + + + +register_extension(&host_pci_ext); diff --git a/linux_module/linux-exts.c b/linux_module/linux-exts.c index 089e67e..38e2e2b 100644 --- a/linux_module/linux-exts.c +++ b/linux_module/linux-exts.c @@ -1,15 +1,97 @@ - #include "linux-exts.h" /* * This is a place holder to ensure that the _lnx_exts section gets created by gcc */ - static struct {} null_ext __attribute__((__used__)) \ __attribute__((unused, __section__ ("_lnx_exts"), \ aligned(sizeof(void *)))); + + +/* */ +/* Global controls */ +/* */ + +struct rb_root global_ctrls; + +static inline struct global_ctrl * __insert_global_ctrl(struct global_ctrl * ctrl) { + struct rb_node ** p = &(global_ctrls.rb_node); + struct rb_node * parent = NULL; + struct global_ctrl * tmp_ctrl = NULL; + + while (*p) { + parent = *p; + tmp_ctrl = rb_entry(parent, struct global_ctrl, tree_node); + + if (ctrl->cmd < tmp_ctrl->cmd) { + p = &(*p)->rb_left; + } else if (ctrl->cmd > tmp_ctrl->cmd) { + p = &(*p)->rb_right; + } else { + return tmp_ctrl; + } + } + + rb_link_node(&(ctrl->tree_node), parent, p); + + return NULL; +} + + + +int add_global_ctrl(unsigned int cmd, + int (*handler)(unsigned int cmd, unsigned long arg)) { + struct global_ctrl * ctrl = kmalloc(sizeof(struct global_ctrl), GFP_KERNEL); + + if (ctrl == NULL) { + printk("Error: Could not allocate global ctrl %d\n", cmd); + return -1; + } + + ctrl->cmd = cmd; + ctrl->handler = handler; + + if (__insert_global_ctrl(ctrl) != NULL) { + printk("Could not insert guest ctrl %d\n", cmd); + kfree(ctrl); + return -1; + } + + rb_insert_color(&(ctrl->tree_node), &(global_ctrls)); + + return 0; +} + + +struct global_ctrl * get_global_ctrl(unsigned int cmd) { + struct rb_node * n = global_ctrls.rb_node; + struct global_ctrl * ctrl = NULL; + + while (n) { + ctrl = rb_entry(n, struct global_ctrl, tree_node); + + if (cmd < ctrl->cmd) { + n = n->rb_left; + } else if (cmd > ctrl->cmd) { + n = n->rb_right; + } else { + return ctrl; + } + } + + return NULL; +} + + + + + +/* */ +/* VM Controls */ +/* */ + struct vm_ext { struct linux_ext * impl; void * vm_data; @@ -86,6 +168,7 @@ int deinit_vm_extensions(struct v3_guest * guest) { return 0; } + int init_lnx_extensions( void ) { extern struct linux_ext * __start__lnx_exts[]; extern struct linux_ext * __stop__lnx_exts[]; @@ -116,6 +199,7 @@ int deinit_lnx_extensions( void ) { while (tmp_ext != __stop__lnx_exts[0]) { INFO("Cleaning up Linux Extension (%s)\n", tmp_ext->name); + if (tmp_ext->deinit != NULL) { tmp_ext->deinit(); } else { @@ -127,3 +211,4 @@ int deinit_lnx_extensions( void ) { return 0; } + diff --git a/linux_module/linux-exts.h b/linux_module/linux-exts.h index c5aeea1..289d68a 100644 --- a/linux_module/linux-exts.h +++ b/linux_module/linux-exts.h @@ -1,9 +1,7 @@ #include "palacios.h" -int add_mod_cmd(struct v3_guest * guest, unsigned int cmd, - int (*handler)(struct v3_guest * guest, - unsigned int cmd, unsigned long arg)); + struct linux_ext { @@ -26,6 +24,21 @@ void * get_vm_ext_data(struct v3_guest * guest, char * ext_name); +struct global_ctrl { + unsigned int cmd; + + int (*handler)(unsigned int cmd, unsigned long arg); + + struct rb_node tree_node; +}; + +int add_global_ctrl(unsigned int cmd, + int (*handler)(unsigned int cmd, unsigned long arg)); + +struct global_ctrl * get_global_ctrl(unsigned int cmd); + + + #define register_extension(ext) \ static struct linux_ext * _lnx_ext \ __attribute__((used)) \ diff --git a/linux_module/main.c b/linux_module/main.c index 74301e9..098d8f1 100644 --- a/linux_module/main.c +++ b/linux_module/main.c @@ -174,9 +174,18 @@ out_err: break; } - default: - ERROR("\tUnhandled\n"); + + default: { + struct global_ctrl * ctrl = get_global_ctrl(ioctl); + + if (ctrl) { + return ctrl->handler(ioctl, arg); + } + + WARNING("\tUnhandled global ctrl cmd: %d\n", ioctl); + return -EINVAL; + } } return 0; diff --git a/linux_module/palacios.h b/linux_module/palacios.h index fd5a188..0141e38 100644 --- a/linux_module/palacios.h +++ b/linux_module/palacios.h @@ -12,6 +12,8 @@ #define V3_FREE_GUEST 13 #define V3_ADD_MEMORY 50 +#define V3_ADD_PCI_HW_DEV 55 +#define V3_ADD_PCI_USER_DEV 56 /* VM Specific IOCTLs */ #define V3_VM_CONSOLE_CONNECT 20 @@ -69,6 +71,19 @@ struct v3_chkpt_info { } __attribute__((packed)); +struct v3_hw_pci_dev { + char name[128]; + unsigned int bus; + unsigned int dev; + unsigned int func; +} __attribute__((packed)); + +struct v3_user_pci_dev { + char name[128]; + unsigned short vendor_id; + unsigned short dev_id; +} __attribute__((packed)); + void * trace_malloc(size_t size, gfp_t flags); diff --git a/linux_usr/Makefile b/linux_usr/Makefile index 5da0c44..83246c2 100644 --- a/linux_usr/Makefile +++ b/linux_usr/Makefile @@ -30,6 +30,8 @@ BASE_EXECS = v3_mem \ v3_debug \ v3_send \ v3_receive \ + v3_pci + # # Examples diff --git a/linux_usr/v3_ctrl.h b/linux_usr/v3_ctrl.h index 6522b34..aae3af6 100644 --- a/linux_usr/v3_ctrl.h +++ b/linux_usr/v3_ctrl.h @@ -6,9 +6,16 @@ #ifndef _v3_ctrl_h #define _v3_ctrl_h + +/* Global Control IOCTLs */ #define V3_CREATE_GUEST 12 #define V3_FREE_GUEST 13 +#define V3_ADD_MEMORY 50 +#define V3_ADD_PCI_HW_DEV 55 +#define V3_ADD_PCI_USER_DEV 56 + +/* VM Specific IOCTLs */ /* VM Specific ioctls */ #define V3_VM_CONSOLE_CONNECT 20 @@ -24,7 +31,6 @@ #define V3_VM_INSPECT 30 #define V3_VM_DEBUG 31 -#define V3_ADD_MEMORY 50 #define V3_VM_MOVE_CORE 33 @@ -69,4 +75,14 @@ struct v3_chkpt_info { char url[256]; /* This might need to be bigger... */ } __attribute__((packed)); + + +struct v3_hw_pci_dev { + char url[128]; + unsigned int bus; + unsigned int dev; + unsigned int func; +} __attribute__((packed)); + + #endif diff --git a/linux_usr/v3_pci.c b/linux_usr/v3_pci.c new file mode 100644 index 0000000..5bbf951 --- /dev/null +++ b/linux_usr/v3_pci.c @@ -0,0 +1,58 @@ +/* Host PCI User space tool + * (c) Jack Lange, 2012 + * jacklange@cs.pitt.edu + */ +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "v3_ctrl.h" + + +int main(int argc, char ** argv) { + int v3_fd = 0; + struct v3_hw_pci_dev dev_info; + unsigned int bus = 0; + unsigned int dev = 0; + unsigned int func = 0; + int ret = 0; + + if (argc < 3) { + printf("Usage: ./v3_pci \n"); + return -1; + } + + bus = atoi(argv[2]); + dev = atoi(argv[3]); + func = atoi(argv[4]); + + strncpy(dev_info.url, argv[1], 128); + dev_info.bus = bus; + dev_info.dev = dev; + dev_info.func = func; + + + v3_fd = open("/dev/v3vee", O_RDONLY); + + if (v3_fd == -1) { + printf("Error opening V3Vee device file\n"); + return -1; + } + + + ret = ioctl(v3_fd, V3_ADD_PCI_HW_DEV, &dev_info); + + + if (ret < 0) { + printf("Error registering PCI device\n"); + return -1; + } + + close(v3_fd); +} diff --git a/palacios/include/interfaces/host_pci.h b/palacios/include/interfaces/host_pci.h new file mode 100644 index 0000000..e426533 --- /dev/null +++ b/palacios/include/interfaces/host_pci.h @@ -0,0 +1,132 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2012, Jack Lange + * Copyright (c) 2012, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + + +#include +#include + + +struct v3_vm_info; + +typedef enum { PT_BAR_NONE, + PT_BAR_IO, + PT_BAR_MEM32, + PT_BAR_MEM24, + PT_BAR_MEM64_LO, + PT_BAR_MEM64_HI, + PT_EXP_ROM } pt_bar_type_t; + + +typedef enum { HOST_PCI_CMD_DMA_DISABLE = 1, + HOST_PCI_CMD_DMA_ENABLE = 2, + HOST_PCI_CMD_INTX_DISABLE = 3, + HOST_PCI_CMD_INTX_ENABLE = 4, + HOST_PCI_CMD_MSI_DISABLE = 5, + HOST_PCI_CMD_MSI_ENABLE = 6, + HOST_PCI_CMD_MSIX_DISABLE = 7, + HOST_PCI_CMD_MSIX_ENABLE = 8 } host_pci_cmd_t; + +struct v3_host_pci_bar { + uint32_t size; + pt_bar_type_t type; + + /* We store 64 bit memory bar addresses in the high BAR + * because they are the last to be updated + * This means that the addr field must be 64 bits + */ + uint64_t addr; + + union { + uint32_t flags; + struct { + uint32_t prefetchable : 1; + uint32_t cacheable : 1; + uint32_t exp_rom_enabled : 1; + uint32_t rsvd : 29; + } __attribute__((packed)); + } __attribute__((packed)); + + +}; + + + +struct v3_host_pci_dev { + struct v3_host_pci_bar bars[6]; + struct v3_host_pci_bar exp_rom; + + uint8_t cfg_space[256]; + + enum {IOMMU, SYMBIOTIC, EMULATED} iface; + + int (*irq_handler)(void * guest_data, uint32_t vec_index); + + void * host_data; + void * guest_data; +}; + +// For now we just support the single contiguous region +// This can be updated in the future to support non-contiguous guests +struct v3_guest_mem_region { + uint64_t start; + uint64_t end; +}; + + +#ifdef __V3VEE__ + +#include + + +struct v3_host_pci_dev * v3_host_pci_get_dev(struct v3_vm_info * vm, char * url, void * priv_data); + + +int v3_host_pci_config_write(struct v3_host_pci_dev * v3_dev, uint32_t reg_num, void * src, uint32_t length); +int v3_host_pci_config_read(struct v3_host_pci_dev * v3_dev, uint32_t reg_num, void * dst, uint32_t length); + +int v3_host_pci_cmd_update(struct v3_host_pci_dev * v3_dev, pci_cmd_t cmd, uint64_t arg); + +int v3_host_pci_ack_irq(struct v3_host_pci_dev * v3_dev, uint32_t vector); + + +#endif + + +struct v3_host_pci_hooks { + struct v3_host_pci_dev * (*request_device)(char * url, void * v3_ctx); + + // emulated interface + + int (*config_write)(struct v3_host_pci_dev * v3_dev, uint32_t reg_num, void * src, uint32_t length); + int (*config_read)(struct v3_host_pci_dev * v3_dev, uint32_t reg_num, void * dst, uint32_t length); + + int (*pci_cmd)(struct v3_host_pci_dev * v3_dev, host_pci_cmd_t cmd, uint64_t arg); + + int (*ack_irq)(struct v3_host_pci_dev * v3_dev, uint32_t vector); + + +}; + + + +void V3_Init_Host_PCI(struct v3_host_pci_hooks * hooks); + +int V3_get_guest_mem_region(struct v3_vm_info * vm, struct v3_guest_mem_region * region); +int V3_host_pci_raise_irq(struct v3_host_pci_dev * v3_dev, uint32_t vec_index); + diff --git a/palacios/src/devices/Makefile b/palacios/src/devices/Makefile index 61d00c5..67ca1f9 100644 --- a/palacios/src/devices/Makefile +++ b/palacios/src/devices/Makefile @@ -20,6 +20,7 @@ obj-$(V3_CONFIG_VNET_NIC) += vnet_nic.o obj-$(V3_CONFIG_NVRAM) += nvram.o obj-$(V3_CONFIG_OS_DEBUG) += os_debug.o obj-$(V3_CONFIG_PCI) += pci.o +obj-$(V3_CONFIG_HOST_PCI) += host_pci.o obj-$(V3_CONFIG_PIIX3) += piix3.o obj-$(V3_CONFIG_SWAPBYPASS_DISK_CACHE) += swapbypass_cache.o obj-$(V3_CONFIG_SWAPBYPASS_DISK_CACHE2) += swapbypass_cache2.o @@ -45,6 +46,5 @@ obj-$(V3_CONFIG_CHAR_STREAM) += char_stream.o obj-$(V3_CONFIG_VGA) += vga.o -obj-$(V3_CONFIG_PCI_FRONT) += pci_front.o obj-$(V3_CONFIG_VNET_GUEST_IFACE) += vnet_guest_iface.o diff --git a/palacios/src/devices/host_pci.c b/palacios/src/devices/host_pci.c new file mode 100644 index 0000000..85e9cb3 --- /dev/null +++ b/palacios/src/devices/host_pci.c @@ -0,0 +1,548 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2012, Jack Lange + * Copyright (c) 2012, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + + +/* This is the generic passthrough PCI virtual device */ + +/* + * The basic idea is that we do not change the hardware PCI configuration + * Instead we modify the guest environment to map onto the physical configuration + * + * The pci subsystem handles most of the configuration space, except for the bar registers. + * We handle them here, by either letting them go directly to hardware or remapping through virtual hooks + * + * Memory Bars are always remapped via the shadow map, + * IO Bars are selectively remapped through hooks if the guest changes them + */ + +#include +#include +#include +#include +#include // must include this to avoid dependency issue +#include + +#include +#include +#include + +#define PCI_BUS_MAX 7 +#define PCI_DEV_MAX 32 +#define PCI_FN_MAX 7 + +#define PCI_DEVICE 0x0 +#define PCI_PCI_BRIDGE 0x1 +#define PCI_CARDBUS_BRIDGE 0x2 + +#define PCI_HDR_SIZE 256 + + + + +struct host_pci_state { + // This holds the description of the host PCI device configuration + struct v3_host_pci_dev * host_dev; + + + struct v3_host_pci_bar virt_bars[6]; + struct v3_host_pci_bar virt_exp_rom; + + struct vm_device * pci_bus; + struct pci_device * pci_dev; + + char name[32]; +}; + + + +/* +static int pci_exp_rom_init(struct vm_device * dev, struct host_pci_state * state) { + struct pci_device * pci_dev = state->pci_dev; + struct v3_host_pci_bar * hrom = &(state->host_dev->exp_rom); + + + + PrintDebug("Adding 32 bit PCI mem region: start=%p, end=%p\n", + (void *)(addr_t)hrom->addr, + (void *)(addr_t)(hrom->addr + hrom->size)); + + if (hrom->exp_rom_enabled) { + // only map shadow memory if the ROM is enabled + + v3_add_shadow_mem(dev->vm, V3_MEM_CORE_ANY, + hrom->addr, + hrom->addr + hrom->size - 1, + hrom->addr); + + // Initially the virtual location matches the physical ones + memcpy(&(state->virt_exp_rom), hrom, sizeof(struct v3_host_pci_bar)); + + + PrintDebug("phys exp_rom: addr=%p, size=%u\n", + (void *)(addr_t)hrom->addr, + hrom->size); + + + // Update the pci subsystem versions + pci_dev->config_header.expansion_rom_address = PCI_EXP_ROM_VAL(hrom->addr, hrom->exp_rom_enabled); + } + + + + return 0; +} +*/ + + +static int pt_io_read(struct guest_info * core, uint16_t port, void * dst, uint_t length, void * priv_data) { + struct v3_host_pci_bar * pbar = (struct v3_host_pci_bar *)priv_data; + int port_offset = port % pbar->size; + + if (length == 1) { + *(uint8_t *)dst = v3_inb(pbar->addr + port_offset); + } else if (length == 2) { + *(uint16_t *)dst = v3_inw(pbar->addr + port_offset); + } else if (length == 4) { + *(uint32_t *)dst = v3_indw(pbar->addr + port_offset); + } else { + PrintError("Invalid PCI passthrough IO Redirection size read\n"); + return -1; + } + + return length; +} + + +static int pt_io_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) { + struct v3_host_pci_bar * pbar = (struct v3_host_pci_bar *)priv_data; + int port_offset = port % pbar->size; + + if (length == 1) { + v3_outb(pbar->addr + port_offset, *(uint8_t *)src); + } else if (length == 2) { + v3_outw(pbar->addr + port_offset, *(uint16_t *)src); + } else if (length == 4) { + v3_outdw(pbar->addr + port_offset, *(uint32_t *)src); + } else { + PrintError("Invalid PCI passthrough IO Redirection size write\n"); + return -1; + } + + return length; + +} + + + +static int pci_bar_init(int bar_num, uint32_t * dst, void * private_data) { + struct vm_device * dev = (struct vm_device *)private_data; + struct host_pci_state * state = (struct host_pci_state *)dev->private_data; + struct v3_host_pci_bar * hbar = &(state->host_dev->bars[bar_num]); + uint32_t bar_val = 0; + + if (hbar->type == PT_BAR_IO) { + int i = 0; + + bar_val = PCI_IO_BAR_VAL(hbar->addr); + + for (i = 0; i < hbar->size; i++) { + v3_hook_io_port(dev->vm, hbar->addr + i, NULL, NULL, NULL); + } + } else if (hbar->type == PT_BAR_MEM32) { + bar_val = PCI_MEM32_BAR_VAL(hbar->addr, hbar->prefetchable); + + v3_add_shadow_mem(dev->vm, V3_MEM_CORE_ANY, + hbar->addr, hbar->addr + hbar->size - 1, + hbar->addr); + + } else if (hbar->type == PT_BAR_MEM24) { + bar_val = PCI_MEM24_BAR_VAL(hbar->addr, hbar->prefetchable); + + v3_add_shadow_mem(dev->vm, V3_MEM_CORE_ANY, + hbar->addr, hbar->addr + hbar->size - 1, + hbar->addr); + } else if (hbar->type == PT_BAR_MEM64_LO) { + PrintError("Don't currently handle 64 bit bars...\n"); + } else if (hbar->type == PT_BAR_MEM64_HI) { + PrintError("Don't currently handle 64 bit bars...\n"); + } + + + memcpy(&(state->virt_bars[bar_num]), hbar, sizeof(struct v3_host_pci_bar)); + + *dst = bar_val; + + return 0; +} + + + +static int pci_bar_write(int bar_num, uint32_t * src, void * private_data) { + struct vm_device * dev = (struct vm_device *)private_data; + struct host_pci_state * state = (struct host_pci_state *)dev->private_data; + + struct v3_host_pci_bar * hbar = &(state->host_dev->bars[bar_num]); + struct v3_host_pci_bar * vbar = &(state->virt_bars[bar_num]); + + + + if (vbar->type == PT_BAR_NONE) { + return 0; + } else if (vbar->type == PT_BAR_IO) { + int i = 0; + + // unhook old ports + for (i = 0; i < vbar->size; i++) { + if (v3_unhook_io_port(dev->vm, vbar->addr + i) == -1) { + PrintError("Could not unhook previously hooked port.... %d (0x%x)\n", + (uint32_t)vbar->addr + i, (uint32_t)vbar->addr + i); + return -1; + } + } + + // clear the low bits to match the size + vbar->addr = *src & ~(hbar->size - 1); + + // udpate source version + *src = PCI_IO_BAR_VAL(vbar->addr); + + PrintDebug("Rehooking passthrough IO ports starting at %d (0x%x)\n", + (uint32_t)vbar->addr, (uint32_t)vbar->addr); + + if (vbar->addr == hbar->addr) { + // Map the io ports as passthrough + for (i = 0; i < hbar->size; i++) { + v3_hook_io_port(dev->vm, hbar->addr + i, NULL, NULL, NULL); + } + } else { + // We have to manually handle the io redirection + for (i = 0; i < vbar->size; i++) { + v3_hook_io_port(dev->vm, vbar->addr + i, pt_io_read, pt_io_write, hbar); + } + } + } else if (vbar->type == PT_BAR_MEM32) { + // remove old mapping + struct v3_mem_region * old_reg = v3_get_mem_region(dev->vm, V3_MEM_CORE_ANY, vbar->addr); + + if (old_reg == NULL) { + // uh oh... + PrintError("Could not find PCI Passthrough memory redirection region (addr=0x%x)\n", (uint32_t)vbar->addr); + return -1; + } + + v3_delete_mem_region(dev->vm, old_reg); + + // clear the low bits to match the size + vbar->addr = *src & ~(hbar->size - 1); + + // Set reserved bits + *src = PCI_MEM32_BAR_VAL(vbar->addr, hbar->prefetchable); + + PrintDebug("Adding pci Passthrough remapping: start=0x%x, size=%d, end=0x%x (hpa=%p)\n", + (uint32_t)vbar->addr, vbar->size, (uint32_t)vbar->addr + vbar->size, (void *)hbar->addr); + + v3_add_shadow_mem(dev->vm, V3_MEM_CORE_ANY, + vbar->addr, + vbar->addr + vbar->size - 1, + hbar->addr); + + } else if (vbar->type == PT_BAR_MEM64_LO) { + // We only store the written values here, the actual reconfig comes when the high BAR is updated + + vbar->addr = *src & ~(hbar->size - 1); + + *src = PCI_MEM64_LO_BAR_VAL(vbar->addr, hbar->prefetchable); + + + } else if (vbar->type == PT_BAR_MEM64_HI) { + struct v3_host_pci_bar * lo_vbar = &(state->virt_bars[bar_num - 1]); + struct v3_mem_region * old_reg = v3_get_mem_region(dev->vm, V3_MEM_CORE_ANY, vbar->addr); + + if (old_reg == NULL) { + // uh oh... + PrintError("Could not find PCI Passthrough memory redirection region (addr=%p)\n", + (void *)(addr_t)vbar->addr); + return -1; + } + + // remove old mapping + v3_delete_mem_region(dev->vm, old_reg); + + vbar->addr = (((uint64_t)*src) << 32) + lo_vbar->addr; + + // We don't set size, because we assume region is less than 4GB + // src does not change, because there are no reserved bits + + + PrintDebug("Adding pci Passthrough remapping: start=%p, size=%p, end=%p\n", + (void *)(addr_t)vbar->addr, (void *)(addr_t)vbar->size, + (void *)(addr_t)(vbar->addr + vbar->size)); + + if (v3_add_shadow_mem(dev->vm, V3_MEM_CORE_ANY, vbar->addr, + vbar->addr + vbar->size - 1, hbar->addr) == -1) { + + PrintDebug("Fail to insert shadow region (%p, %p) -> %p\n", + (void *)(addr_t)vbar->addr, + (void *)(addr_t)(vbar->addr + vbar->size - 1), + (void *)(addr_t)hbar->addr); + return -1; + } + + } else { + PrintError("Unhandled Pasthrough PCI Bar type %d\n", vbar->type); + return -1; + } + + + return 0; +} + + +static int pt_config_write(struct pci_device * pci_dev, uint32_t reg_num, void * src, uint_t length, void * private_data) { + struct vm_device * dev = (struct vm_device *)private_data; + struct host_pci_state * state = (struct host_pci_state *)dev->private_data; + +// V3_Print("Writing host PCI config space update\n"); + + // We will mask all operations to the config header itself, + // and only allow direct access to the device specific config space + if (reg_num < 64) { + return 0; + } + + return v3_host_pci_config_write(state->host_dev, reg_num, src, length); +} + + + +static int pt_config_read(struct pci_device * pci_dev, uint32_t reg_num, void * dst, uint_t length, void * private_data) { + struct vm_device * dev = (struct vm_device *)private_data; + struct host_pci_state * state = (struct host_pci_state *)dev->private_data; + + // V3_Print("Reading host PCI config space update\n"); + + return v3_host_pci_config_read(state->host_dev, reg_num, dst, length); +} + + + + +/* This is really iffy.... + * It was totally broken before, but it's _not_ totally fixed now + * The Expansion rom can be enabled/disabled via software using the low order bit + * We should probably handle that somehow here... + */ +static int pt_exp_rom_write(struct pci_device * pci_dev, uint32_t * src, void * priv_data) { + struct vm_device * dev = (struct vm_device *)(priv_data); + struct host_pci_state * state = (struct host_pci_state *)dev->private_data; + + struct v3_host_pci_bar * hrom = &(state->host_dev->exp_rom); + struct v3_host_pci_bar * vrom = &(state->virt_exp_rom); + + PrintDebug("exp_rom update: src=0x%x\n", *src); + PrintDebug("vrom is size=%u, addr=0x%x\n", vrom->size, (uint32_t)vrom->addr); + PrintDebug("hrom is size=%u, addr=0x%x\n", hrom->size, (uint32_t)hrom->addr); + + if (hrom->exp_rom_enabled) { + // only remove old mapping if present, I.E. if the rom was enabled previously + if (vrom->exp_rom_enabled) { + struct v3_mem_region * old_reg = v3_get_mem_region(dev->vm, V3_MEM_CORE_ANY, vrom->addr); + + if (old_reg == NULL) { + // uh oh... + PrintError("Could not find PCI Passthrough exp_rom_base redirection region (addr=0x%x)\n", (uint32_t)vrom->addr); + return -1; + } + + v3_delete_mem_region(dev->vm, old_reg); + } + + + vrom->addr = *src & ~(hrom->size - 1); + + // Set flags in actual register value + *src = PCI_EXP_ROM_VAL(vrom->addr, (*src & 0x00000001)); + + PrintDebug("Cooked src=0x%x\n", *src); + + + PrintDebug("Adding pci Passthrough exp_rom_base remapping: start=0x%x, size=%u, end=0x%x\n", + (uint32_t)vrom->addr, vrom->size, (uint32_t)vrom->addr + vrom->size); + + if (v3_add_shadow_mem(dev->vm, V3_MEM_CORE_ANY, vrom->addr, + vrom->addr + vrom->size - 1, hrom->addr) == -1) { + PrintError("Failed to remap pci exp_rom: start=0x%x, size=%u, end=0x%x\n", + (uint32_t)vrom->addr, vrom->size, (uint32_t)vrom->addr + vrom->size); + return -1; + } + } + + return 0; +} + + +static int pt_cmd_update(struct pci_device * pci, pci_cmd_t cmd, uint64_t arg, void * priv_data) { + struct vm_device * dev = (struct vm_device *)(priv_data); + struct host_pci_state * state = (struct host_pci_state *)dev->private_data; + + V3_Print("Host PCI Device: CMD update (%d)(arg=%llu)\n", cmd, arg); + + v3_host_pci_cmd_update(state->host_dev, cmd, arg); + + return 0; +} + + +static int setup_virt_pci_dev(struct v3_vm_info * vm_info, struct vm_device * dev) { + struct host_pci_state * state = (struct host_pci_state *)dev->private_data; + struct pci_device * pci_dev = NULL; + struct v3_pci_bar bars[6]; + int bus_num = 0; + int i; + + for (i = 0; i < 6; i++) { + bars[i].type = PCI_BAR_PASSTHROUGH; + bars[i].private_data = dev; + bars[i].bar_init = pci_bar_init; + bars[i].bar_write = pci_bar_write; + } + + pci_dev = v3_pci_register_device(state->pci_bus, + PCI_STD_DEVICE, + bus_num, -1, 0, + state->name, bars, + pt_config_write, + pt_config_read, + pt_cmd_update, + pt_exp_rom_write, + dev); + + + state->pci_dev = pci_dev; + + // pci_exp_rom_init(dev, state); + pci_dev->config_header.expansion_rom_address = 0; + + v3_pci_enable_capability(pci_dev, PCI_CAP_MSI); +// v3_pci_enable_capability(pci_dev, PCI_CAP_MSIX); + v3_pci_enable_capability(pci_dev, PCI_CAP_PCIE); + v3_pci_enable_capability(pci_dev, PCI_CAP_PM); + + + + if (state->host_dev->iface == SYMBIOTIC) { +#ifdef V3_CONFIG_SYMBIOTIC + v3_sym_map_pci_passthrough(vm_info, pci_dev->bus_num, pci_dev->dev_num, pci_dev->fn_num); +#else + PrintError("ERROR Symbiotic Passthrough is not enabled\n"); + return -1; +#endif + } + + return 0; +} + + +static struct v3_device_ops dev_ops = { + .free = NULL, +}; + + +static int irq_ack(struct guest_info * core, uint32_t irq, void * private_data) { + struct host_pci_state * state = (struct host_pci_state *)private_data; + + + // V3_Print("Acking IRQ %d\n", irq); + v3_host_pci_ack_irq(state->host_dev, irq); + + return 0; +} + + +static int irq_handler(void * private_data, uint32_t vec_index) { + struct host_pci_state * state = (struct host_pci_state *)private_data; + struct v3_irq vec; + + vec.irq = vec_index; + vec.ack = irq_ack; + vec.private_data = state; + + + // V3_Print("Raising host PCI IRQ %d\n", vec_index); + + if (state->pci_dev->irq_type == IRQ_NONE) { + return 0; + } else if (state->pci_dev->irq_type == IRQ_INTX) { + v3_pci_raise_acked_irq(state->pci_bus, state->pci_dev, vec); + } else { + v3_pci_raise_irq(state->pci_bus, state->pci_dev, vec_index); + } + + return 0; +} + + +static int host_pci_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { + struct host_pci_state * state = V3_Malloc(sizeof(struct host_pci_state)); + struct vm_device * dev = NULL; + struct vm_device * pci = v3_find_dev(vm, v3_cfg_val(cfg, "bus")); + char * dev_id = v3_cfg_val(cfg, "ID"); + char * url = v3_cfg_val(cfg, "url"); + + memset(state, 0, sizeof(struct host_pci_state)); + + if (!pci) { + PrintError("PCI bus not specified in config file\n"); + return -1; + } + + state->pci_bus = pci; + strncpy(state->name, dev_id, 32); + + + dev = v3_add_device(vm, dev_id, &dev_ops, state); + + if (dev == NULL) { + PrintError("Could not attach device %s\n", dev_id); + V3_Free(state); + return -1; + } + + state->host_dev = v3_host_pci_get_dev(vm, url, state); + + if (state->host_dev == NULL) { + PrintError("Could not connect to host pci device (%s)\n", url); + return -1; + } + + + state->host_dev->irq_handler = irq_handler; + + if (setup_virt_pci_dev(vm, dev) == -1) { + PrintError("Could not setup virtual host PCI device\n"); + return -1; + } + + return 0; +} + + + + +device_register("HOST_PCI", host_pci_init) diff --git a/palacios/src/interfaces/Kconfig b/palacios/src/interfaces/Kconfig index d2962a6..6553f40 100644 --- a/palacios/src/interfaces/Kconfig +++ b/palacios/src/interfaces/Kconfig @@ -60,6 +60,13 @@ config PACKET to support the internal networking features of Palacios. +config HOST_PCI + bool "Host PCI Support" + depends on EXPERIMENTAL + default y + help + This enables host support for passthrough PCI devices + config HOST_DEVICE bool "Host device support" default n diff --git a/palacios/src/interfaces/Makefile b/palacios/src/interfaces/Makefile index 3c340de..c04193f 100644 --- a/palacios/src/interfaces/Makefile +++ b/palacios/src/interfaces/Makefile @@ -7,6 +7,7 @@ obj-$(V3_CONFIG_GRAPHICS_CONSOLE) += vmm_graphics_console.o obj-$(V3_CONFIG_KEYED_STREAMS) += vmm_keyed_stream.o obj-$(V3_CONFIG_HOST_DEVICE) += vmm_host_dev.o obj-$(V3_CONFIG_HOST_HYPERCALL) += vmm_host_hypercall.o +obj-$(V3_CONFIG_HOST_PCI) += host_pci.o obj-y += null.o diff --git a/palacios/src/interfaces/host_pci.c b/palacios/src/interfaces/host_pci.c new file mode 100644 index 0000000..e945b92 --- /dev/null +++ b/palacios/src/interfaces/host_pci.c @@ -0,0 +1,138 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2012, Jack Lange + * Copyright (c) 2012, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + + +#include +#include +#include +#include + + + +static struct v3_host_pci_hooks * pci_hooks = NULL; + + + +void V3_Init_Host_PCI(struct v3_host_pci_hooks * hooks) { + pci_hooks = hooks; + V3_Print("V3 host PCI interface intialized\n"); + return; +} + + +/* This is ugly and should be abstracted out to a function in the memory manager */ +int V3_get_guest_mem_region(struct v3_vm_info * vm, struct v3_guest_mem_region * region) { + + if (!vm) { + PrintError("Tried to get a nenregion from a NULL vm pointer\n"); + return -1; + } + + + region->start = vm->mem_map.base_region.host_addr; + region->end = vm->mem_map.base_region.host_addr + (vm->mem_map.base_region.guest_end - vm->mem_map.base_region.guest_start); + + return 0; +} + + +struct v3_host_pci_dev * v3_host_pci_get_dev(struct v3_vm_info * vm, + char * url, void * priv_data) { + + struct v3_host_pci_dev * host_dev = NULL; + + if ((!pci_hooks) || (!pci_hooks->request_device)) { + PrintError("Host PCI Hooks not initialized\n"); + return NULL; + } + + host_dev = pci_hooks->request_device(url, vm); + + if (host_dev == NULL) { + PrintError("Could not find host PCI device (%s)\n", url); + return NULL; + } + + host_dev->guest_data = priv_data; + + return host_dev; + +} + + +int v3_host_pci_config_write(struct v3_host_pci_dev * v3_dev, + uint32_t reg_num, void * src, + uint32_t length) { + + if ((!pci_hooks) || (!pci_hooks->config_write)) { + PrintError("Host PCI hooks not initialized\n"); + return -1; + } + + return pci_hooks->config_write(v3_dev, reg_num, src, length); +} + + +int v3_host_pci_config_read(struct v3_host_pci_dev * v3_dev, + uint32_t reg_num, void * dst, + uint32_t length) { + + if ((!pci_hooks) || (!pci_hooks->config_read)) { + PrintError("Host PCI hooks not initialized\n"); + return -1; + } + + return pci_hooks->config_read(v3_dev, reg_num, dst, length); +} + +int v3_host_pci_ack_irq(struct v3_host_pci_dev * v3_dev, uint32_t vec_index) { + + if ((!pci_hooks) || (!pci_hooks->ack_irq)) { + PrintError("Host PCI hooks not initialized\n"); + return -1; + } + + return pci_hooks->ack_irq(v3_dev, vec_index); +} + + + +int v3_host_pci_cmd_update(struct v3_host_pci_dev * v3_dev, pci_cmd_t cmd, uint64_t arg ) { + + if ((!pci_hooks) || (!pci_hooks->pci_cmd)) { + PrintError("Host PCI hooks not initialized\n"); + return -1; + } + + return pci_hooks->pci_cmd(v3_dev, cmd, arg); +} + + + + + +int V3_host_pci_raise_irq(struct v3_host_pci_dev * v3_dev, uint32_t vec_index) { + if (!v3_dev->irq_handler) { + PrintError("No interrupt registerd for host pci device\n"); + return -1; + } + + return v3_dev->irq_handler(v3_dev->guest_data, vec_index); +} +