1 /* Linux host side PCI passthrough support
2 * Jack Lange <jacklange@cs.pitt.edu>, 2012
6 #include <linux/iommu.h>
7 #include <linux/interrupt.h>
8 #include <linux/version.h>
11 #define PCI_HDR_SIZE 256
14 static int setup_hw_pci_dev(struct host_pci_device * host_dev) {
16 struct pci_dev * dev = NULL;
17 struct v3_host_pci_dev * v3_dev = &(host_dev->v3_dev);
19 dev = pci_get_bus_and_slot(host_dev->hw_dev.bus,
20 host_dev->hw_dev.devfn);
24 printk("Could not find HW pci device (bus=%d, devfn=%d)\n",
25 host_dev->hw_dev.bus, host_dev->hw_dev.devfn);
29 // record pointer in dev state
30 host_dev->hw_dev.dev = dev;
32 host_dev->hw_dev.intx_disabled = 1;
33 spin_lock_init(&(host_dev->hw_dev.intx_lock));
35 if (pci_enable_device(dev)) {
36 printk("Could not enable Device\n");
40 ret = pci_request_regions(dev, "v3vee");
42 printk("Could not reservce PCI regions\n");
47 pci_reset_function(host_dev->hw_dev.dev);
48 pci_save_state(host_dev->hw_dev.dev);
53 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
54 printk("Resource %d\n", i);
55 printk("\tflags = 0x%lx\n", pci_resource_flags(dev, i));
56 printk("\t name=%s, start=%lx, size=%d\n",
57 host_dev->hw_dev.dev->resource[i].name, (uintptr_t)pci_resource_start(dev, i),
58 (u32)pci_resource_len(dev, i));
62 printk("Rom BAR=%d\n", dev->rom_base_reg);
65 /* Cache first 6 BAR regs */
69 for (i = 0; i < 6; i++) {
70 struct v3_host_pci_bar * bar = &(v3_dev->bars[i]);
73 bar->size = pci_resource_len(dev, i);
74 bar->addr = pci_resource_start(dev, i);
75 flags = pci_resource_flags(dev, i);
77 if (flags & IORESOURCE_IO) {
78 bar->type = PT_BAR_IO;
79 } else if (flags & IORESOURCE_MEM) {
80 if (flags & IORESOURCE_MEM_64) {
81 printk("ERROR: 64 Bit BARS not yet supported\n");
82 bar->type = PT_BAR_NONE;
83 } else if (flags & IORESOURCE_DMA) {
84 bar->type = PT_BAR_MEM24;
86 bar->type = PT_BAR_MEM32;
89 bar->cacheable = ((flags & IORESOURCE_CACHEABLE) != 0);
90 bar->prefetchable = ((flags & IORESOURCE_PREFETCH) != 0);
93 bar->type = PT_BAR_NONE;
98 /* Cache expansion rom bar */
100 struct resource * rom_res = &(dev->resource[PCI_ROM_RESOURCE]);
101 int rom_size = pci_resource_len(dev, PCI_ROM_RESOURCE);
106 v3_dev->exp_rom.size = rom_size;
107 v3_dev->exp_rom.addr = pci_resource_start(dev, PCI_ROM_RESOURCE);
108 flags = pci_resource_flags(dev, PCI_ROM_RESOURCE);
110 v3_dev->exp_rom.type = PT_EXP_ROM;
112 v3_dev->exp_rom.exp_rom_enabled = rom_res->flags & IORESOURCE_ROM_ENABLE;
116 /* Cache entire configuration space */
120 // Copy the configuration space to the local cached version
121 for (m = 0; m < PCI_HDR_SIZE; m += 4) {
122 pci_read_config_dword(dev, m, (u32 *)&(v3_dev->cfg_space[m]));
127 /* HARDCODED for now but this will need to depend on IOMMU support detection */
129 printk("Setting host PCI device (%s) as IOMMU\n", host_dev->name);
130 v3_dev->iface = IOMMU;
132 printk("Setting host PCI device (%s) as SYMBIOTIC\n", host_dev->name);
133 v3_dev->iface = SYMBIOTIC;
142 static irqreturn_t host_pci_intx_irq_handler(int irq, void * priv_data) {
143 struct host_pci_device * host_dev = priv_data;
145 // printk("Host PCI IRQ handler (%d)\n", irq);
147 spin_lock(&(host_dev->hw_dev.intx_lock));
148 disable_irq_nosync(irq);
149 host_dev->hw_dev.intx_disabled = 1;
150 spin_unlock(&(host_dev->hw_dev.intx_lock));
152 V3_host_pci_raise_irq(&(host_dev->v3_dev), 0);
159 static irqreturn_t host_pci_msi_irq_handler(int irq, void * priv_data) {
160 struct host_pci_device * host_dev = priv_data;
161 // printk("Host PCI MSI IRQ Handler (%d)\n", irq);
163 V3_host_pci_raise_irq(&(host_dev->v3_dev), 0);
168 static irqreturn_t host_pci_msix_irq_handler(int irq, void * priv_data) {
169 struct host_pci_device * host_dev = priv_data;
172 // printk("Host PCI MSIX IRQ Handler (%d)\n", irq);
175 for (i = 0; i < host_dev->hw_dev.num_msix_vecs; i++) {
176 if (irq == host_dev->hw_dev.msix_entries[i].vector) {
177 V3_host_pci_raise_irq(&(host_dev->v3_dev), i);
179 printk("Error Could not find matching MSIX vector for IRQ %d\n", irq);
186 static int hw_pci_cmd(struct host_pci_device * host_dev, host_pci_cmd_t cmd, u64 arg) {
187 //struct v3_host_pci_dev * v3_dev = &(host_dev->v3_dev);
188 struct pci_dev * dev = host_dev->hw_dev.dev;
191 case HOST_PCI_CMD_DMA_DISABLE:
192 printk("Passthrough PCI device disabling BMDMA\n");
193 pci_clear_master(host_dev->hw_dev.dev);
195 case HOST_PCI_CMD_DMA_ENABLE:
196 printk("Passthrough PCI device Enabling BMDMA\n");
197 pci_set_master(host_dev->hw_dev.dev);
200 case HOST_PCI_CMD_INTX_DISABLE:
201 printk("Passthrough PCI device disabling INTx IRQ\n");
203 disable_irq(dev->irq);
204 free_irq(dev->irq, (void *)host_dev);
207 case HOST_PCI_CMD_INTX_ENABLE:
208 printk("Passthrough PCI device Enabling INTx IRQ\n");
210 if (request_threaded_irq(dev->irq, NULL, host_pci_intx_irq_handler,
211 IRQF_ONESHOT, "V3Vee_Host_PCI_INTx", (void *)host_dev)) {
212 printk("ERROR Could not assign IRQ to host PCI device (%s)\n", host_dev->name);
217 case HOST_PCI_CMD_MSI_DISABLE:
218 printk("Passthrough PCI device Disabling MSIs\n");
220 disable_irq(dev->irq);
221 free_irq(dev->irq, (void *)host_dev);
223 pci_disable_msi(dev);
226 case HOST_PCI_CMD_MSI_ENABLE:
227 printk("Passthrough PCI device Enabling MSI\n");
229 if (!dev->msi_enabled) {
232 if (request_irq(dev->irq, host_pci_msi_irq_handler,
233 0, "V3Vee_host_PCI_MSI", (void *)host_dev)) {
234 printk("Error Requesting IRQ %d for Passthrough MSI IRQ\n", dev->irq);
242 case HOST_PCI_CMD_MSIX_ENABLE: {
245 printk("Passthrough PCI device Enabling MSIX\n");
246 host_dev->hw_dev.num_msix_vecs = arg;;
247 host_dev->hw_dev.msix_entries = kcalloc(host_dev->hw_dev.num_msix_vecs,
248 sizeof(struct msix_entry), GFP_KERNEL);
250 for (i = 0; i < host_dev->hw_dev.num_msix_vecs; i++) {
251 host_dev->hw_dev.msix_entries[i].entry = i;
254 pci_enable_msix(dev, host_dev->hw_dev.msix_entries,
255 host_dev->hw_dev.num_msix_vecs);
257 for (i = 0; i < host_dev->hw_dev.num_msix_vecs; i++) {
258 if (request_irq(host_dev->hw_dev.msix_entries[i].vector,
259 host_pci_msix_irq_handler,
260 0, "V3VEE_host_PCI_MSIX", (void *)host_dev)) {
261 printk("Error requesting IRQ %d for Passthrough MSIX IRQ\n",
262 host_dev->hw_dev.msix_entries[i].vector);
269 case HOST_PCI_CMD_MSIX_DISABLE: {
272 printk("Passthrough PCI device Disabling MSIX\n");
274 for (i = 0; i < host_dev->hw_dev.num_msix_vecs; i++) {
275 disable_irq(host_dev->hw_dev.msix_entries[i].vector);
278 for (i = 0; i < host_dev->hw_dev.num_msix_vecs; i++) {
279 free_irq(host_dev->hw_dev.msix_entries[i].vector, (void *)host_dev);
282 host_dev->hw_dev.num_msix_vecs = 0;
283 kfree(host_dev->hw_dev.msix_entries);
285 pci_disable_msix(dev);
290 printk("Error: unhandled passthrough PCI command: %d\n", cmd);
299 static int hw_ack_irq(struct host_pci_device * host_dev, u32 vector) {
300 struct pci_dev * dev = host_dev->hw_dev.dev;
303 // printk("Acking IRQ vector %d\n", vector);
305 spin_lock_irqsave(&(host_dev->hw_dev.intx_lock), flags);
306 // printk("Enabling IRQ %d\n", dev->irq);
307 enable_irq(dev->irq);
308 host_dev->hw_dev.intx_disabled = 0;
309 spin_unlock_irqrestore(&(host_dev->hw_dev.intx_lock), flags);
317 static int reserve_hw_pci_dev(struct host_pci_device * host_dev, void * v3_ctx) {
320 struct v3_host_pci_dev * v3_dev = &(host_dev->v3_dev);
321 struct pci_dev * dev = host_dev->hw_dev.dev;
323 spin_lock_irqsave(&lock, flags);
324 if (host_dev->hw_dev.in_use == 0) {
325 host_dev->hw_dev.in_use = 1;
329 spin_unlock_irqrestore(&lock, flags);
332 if (v3_dev->iface == IOMMU) {
333 struct v3_guest_mem_region region;
336 host_dev->hw_dev.iommu_domain = iommu_domain_alloc();
338 if (V3_get_guest_mem_region(v3_ctx, ®ion) == -1) {
339 printk("Error getting VM memory region for IOMMU support\n");
343 printk("Memory region: start=%p, end=%p\n", (void *)region.start, (void *)region.end);
346 flags = IOMMU_READ | IOMMU_WRITE; // Need to see what IOMMU_CACHE means
348 /* This version could be wrong */
349 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
350 // Guest VAs start at zero and go to end of memory
351 iommu_map_range(host_dev->hw_dev.iommu_domain, 0, region.start, (region.end - region.start), flags);
353 /* Linux actually made the interface worse... Now you can only map memory in powers of 2 (meant to only be pages...) */
355 u64 size = region.end - region.start;
356 u32 page_size = 512 * 4096; // assume large 64bit pages (2MB)
357 u64 dpa = 0; // same as gpa
358 u64 hpa = region.start;
361 if (size < page_size) {
362 page_size = 4096; // less than a 2MB granularity, so we switch to small pages (4KB)
365 printk("Mapping IOMMU region dpa=%p hpa=%p (size=%d)\n", (void *)dpa, (void *)hpa, page_size);
367 if (iommu_map(host_dev->hw_dev.iommu_domain, dpa, hpa,
368 get_order(page_size), flags)) {
369 printk("ERROR: Could not map sub region (DPA=%p) (HPA=%p) (order=%d)\n",
370 (void *)dpa, (void *)hpa, get_order(page_size));
382 if (iommu_attach_device(host_dev->hw_dev.iommu_domain, &(dev->dev))) {
383 printk("ERROR attaching host PCI device to IOMMU domain\n");
389 printk("Requesting Threaded IRQ handler for IRQ %d\n", dev->irq);
390 // setup regular IRQs until advanced IRQ mechanisms are enabled
391 if (request_threaded_irq(dev->irq, NULL, host_pci_intx_irq_handler,
392 IRQF_ONESHOT, "V3Vee_Host_PCI_INTx", (void *)host_dev)) {
393 printk("ERROR Could not assign IRQ to host PCI device (%s)\n", host_dev->name);
404 static int write_hw_pci_config(struct host_pci_device * host_dev, u32 reg, void * data, u32 length) {
405 struct pci_dev * dev = host_dev->hw_dev.dev;
412 pci_write_config_byte(dev, reg, *(u8 *)data);
413 } else if (length == 2) {
414 pci_write_config_word(dev, reg, *(u16 *)data);
415 } else if (length == 4) {
416 pci_write_config_dword(dev, reg, *(u32 *)data);
418 printk("Invalid length of host PCI config update\n");
427 static int read_hw_pci_config(struct host_pci_device * host_dev, u32 reg, void * data, u32 length) {
428 struct pci_dev * dev = host_dev->hw_dev.dev;
432 pci_read_config_byte(dev, reg, data);
433 } else if (length == 2) {
434 pci_read_config_word(dev, reg, data);
435 } else if (length == 4) {
436 pci_read_config_dword(dev, reg, data);
438 printk("Invalid length of host PCI config read\n");