From 50b2b540c00941ced618df9deafccc30826b13b5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 14 Mar 2017 15:21:58 +0200 Subject: PCI/PM: Don't sleep at all when d3_delay or d3cold_delay is zero msleep() still sleeps 1 jiffy even when told to sleep for zero milliseconds. That can end up being 1-2 milliseconds or more. In the cases of d3_delay and d3cold_delay, that unnecessarily increases suspend and/or resume latencies. Do not sleep at all for the respective cases if d3_delay is zero or d3cold_delay is zero. Signed-off-by: Adrian Hunter Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki --- drivers/pci/pci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7904d02ffdb9..9779483f81fd 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -66,7 +66,8 @@ static void pci_dev_d3_sleep(struct pci_dev *dev) if (delay < pci_pm_d3_delay) delay = pci_pm_d3_delay; - msleep(delay); + if (delay) + msleep(delay); } #ifdef CONFIG_PCI_DOMAINS @@ -827,7 +828,8 @@ static void __pci_start_power_transition(struct pci_dev *dev, pci_power_t state) * because have already delayed for the bridge. */ if (dev->runtime_d3cold) { - msleep(dev->d3cold_delay); + if (dev->d3cold_delay) + msleep(dev->d3cold_delay); /* * When powering on a bridge from D3cold, the * whole hierarchy may be powered on into -- cgit v1.2.3 From fe2bd75b2290bf6acd4b91312697e445668dd773 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 29 Mar 2017 22:49:17 -0500 Subject: PCI: Short-circuit pci_device_is_present() for disconnected devices If the PCI device is disconnected, return false immediately from pci_device_is_present(). pci_device_is_present() uses the bus accessors, so the early return in the device accessors doesn't help here. Tested-by: Krishna Dhulipala Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Reviewed-by: Wei Zhang --- drivers/pci/pci.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7904d02ffdb9..8ab0a0d3cddb 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4932,6 +4932,8 @@ bool pci_device_is_present(struct pci_dev *pdev) { u32 v; + if (pci_dev_is_disconnected(pdev)) + return false; return pci_bus_read_dev_vendor_id(pdev->bus, pdev->devfn, &v, 0); } EXPORT_SYMBOL_GPL(pci_device_is_present); -- cgit v1.2.3 From f65fd1aa4f9881d5540192d11f7b8ed2fec936db Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Mon, 3 Apr 2017 16:02:50 -0500 Subject: PCI: Avoid FLR for Intel 82579 NICs Per Intel Specification Update 335553-002 (see link below), some 82579 network adapters advertise a Function Level Reset (FLR) capability, but they can hang when an FLR is triggered. To reproduce the problem, attach the device to a VM, then detach and try to attach again. Add a quirk to prevent the use of FLR on these devices. [bhelgaas: changelog, comments] Link: http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/82579lm-82579v-gigabit-network-connection-spec-update.pdf Signed-off-by: Sasha Neftin Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7904d02ffdb9..bef14777bb30 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3781,6 +3781,9 @@ static int pcie_flr(struct pci_dev *dev, int probe) if (!(cap & PCI_EXP_DEVCAP_FLR)) return -ENOTTY; + if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET) + return -ENOTTY; + if (probe) return 0; @@ -3801,6 +3804,9 @@ static int pci_af_flr(struct pci_dev *dev, int probe) if (!pos) return -ENOTTY; + if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET) + return -ENOTTY; + pci_read_config_byte(dev, pos + PCI_AF_CAP, &cap); if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) return -ENOTTY; -- cgit v1.2.3 From ea629d873f3e140fb2e3181c30413e485ee9002b Mon Sep 17 00:00:00 2001 From: Yongji Xie Date: Wed, 15 Feb 2017 14:50:22 +0800 Subject: PCI: Ignore requested alignment for IOV BARs We would call pci_reassigndev_resource_alignment() before pci_init_capabilities(). So the requested alignment would never work for IOV BARs. Furthermore, it's meaningless to request additional alignment for IOV BARs, the IOV BAR alignment is only determined by the VF BAR size. Signed-off-by: Yongji Xie Signed-off-by: Bjorn Helgaas Reviewed-by: Gavin Shan --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7904d02ffdb9..679af2a253ad 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5084,7 +5084,7 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev) command &= ~PCI_COMMAND_MEMORY; pci_write_config_word(dev, PCI_COMMAND, command); - for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) { + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { r = &dev->resource[i]; if (!(r->flags & IORESOURCE_MEM)) continue; -- cgit v1.2.3 From ea00353f36b64375518662a8ad15e39218a1f324 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 18 Apr 2017 20:44:30 +0200 Subject: PCI: Freeze PME scan before suspending devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Laurent Pinchart reported that the Renesas R-Car H2 Lager board (r8a7790) crashes during suspend tests. Geert Uytterhoeven managed to reproduce the issue on an M2-W Koelsch board (r8a7791): It occurs when the PME scan runs, once per second. During PME scan, the PCI host bridge (rcar-pci) registers are accessed while its module clock has already been disabled, leading to the crash. One reproducer is to configure s2ram to use "s2idle" instead of "deep" suspend: # echo 0 > /sys/module/printk/parameters/console_suspend # echo s2idle > /sys/power/mem_sleep # echo mem > /sys/power/state Another reproducer is to write either "platform" or "processors" to /sys/power/pm_test. It does not (or is less likely) to happen during full system suspend ("core" or "none") because system suspend also disables timers, and thus the workqueue handling PME scans no longer runs. Geert believes the issue may still happen in the small window between disabling module clocks and disabling timers: # echo 0 > /sys/module/printk/parameters/console_suspend # echo platform > /sys/power/pm_test # Or "processors" # echo mem > /sys/power/state (Make sure CONFIG_PCI_RCAR_GEN2 and CONFIG_USB_OHCI_HCD_PCI are enabled.) Rafael Wysocki agrees that PME scans should be suspended before the host bridge registers become inaccessible. To that end, queue the task on a workqueue that gets frozen before devices suspend. Rafael notes however that as a result, some wakeup events may be missed if they are delivered via PME from a device without working IRQ (which hence must be polled) and occur after the workqueue has been frozen. If that turns out to be an issue in practice, it may be possible to solve it by calling pci_pme_list_scan() once directly from one of the host bridge's pm_ops callbacks. Stacktrace for posterity: PM: Syncing filesystems ... [ 38.566237] done. PM: Preparing system for sleep (mem) Freezing user space processes ... [ 38.579813] (elapsed 0.001 seconds) done. Freezing remaining freezable tasks ... (elapsed 0.001 seconds) done. PM: Suspending system (mem) PM: suspend of devices complete after 152.456 msecs PM: late suspend of devices complete after 2.809 msecs PM: noirq suspend of devices complete after 29.863 msecs suspend debug: Waiting for 5 second(s). Unhandled fault: asynchronous external abort (0x1211) at 0x00000000 pgd = c0003000 [00000000] *pgd=80000040004003, *pmd=00000000 Internal error: : 1211 [#1] SMP ARM Modules linked in: CPU: 1 PID: 20 Comm: kworker/1:1 Not tainted 4.9.0-rc1-koelsch-00011-g68db9bc814362e7f #3383 Hardware name: Generic R8A7791 (Flattened Device Tree) Workqueue: events pci_pme_list_scan task: eb56e140 task.stack: eb58e000 PC is at pci_generic_config_read+0x64/0x6c LR is at rcar_pci_cfg_base+0x64/0x84 pc : [] lr : [] psr: 600d0093 sp : eb58fe98 ip : c041d750 fp : 00000008 r10: c0e2283c r9 : 00000000 r8 : 600d0013 r7 : 00000008 r6 : eb58fed6 r5 : 00000002 r4 : eb58feb4 r3 : 00000000 r2 : 00000044 r1 : 00000008 r0 : 00000000 Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user Control: 30c5387d Table: 6a9f6c80 DAC: 55555555 Process kworker/1:1 (pid: 20, stack limit = 0xeb58e210) Stack: (0xeb58fe98 to 0xeb590000) fe80: 00000002 00000044 fea0: eb6f5800 c041d9b0 eb58feb4 00000008 00000044 00000000 eb78a000 eb78a000 fec0: 00000044 00000000 eb9aff00 c0424bf0 eb78a000 00000000 eb78a000 c0e22830 fee0: ea8a6fc0 c0424c5c eaae79c0 c0424ce0 eb55f380 c0e22838 eb9a9800 c0235fbc ff00: eb55f380 c0e22838 eb55f380 eb9a9800 eb9a9800 eb58e000 eb9a9824 c0e02100 ff20: eb55f398 c02366c4 eb56e140 eb5631c0 00000000 eb55f380 c023641c 00000000 ff40: 00000000 00000000 00000000 c023a928 cd105598 00000000 40506a34 eb55f380 ff60: 00000000 00000000 dead4ead ffffffff ffffffff eb58ff74 eb58ff74 00000000 ff80: 00000000 dead4ead ffffffff ffffffff eb58ff90 eb58ff90 eb58ffac eb5631c0 ffa0: c023a844 00000000 00000000 c0206d68 00000000 00000000 00000000 00000000 ffc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 ffe0: 00000000 00000000 00000000 00000000 00000013 00000000 3a81336c 10ccd1dd [] (pci_generic_config_read) from [] (pci_bus_read_config_word+0x58/0x80) [] (pci_bus_read_config_word) from [] (pci_check_pme_status+0x34/0x78) [] (pci_check_pme_status) from [] (pci_pme_wakeup+0x28/0x54) [] (pci_pme_wakeup) from [] (pci_pme_list_scan+0x58/0xb4) [] (pci_pme_list_scan) from [] (process_one_work+0x1bc/0x308) [] (process_one_work) from [] (worker_thread+0x2a8/0x3e0) [] (worker_thread) from [] (kthread+0xe4/0xfc) [] (kthread) from [] (ret_from_fork+0x14/0x2c) Code: ea000000 e5903000 f57ff04f e3a00000 (e5843000) ---[ end trace 667d43ba3aa9e589 ]--- Fixes: df17e62e5bff ("PCI: Add support for polling PME state on suspended legacy PCI devices") Reported-and-tested-by: Laurent Pinchart Reported-and-tested-by: Geert Uytterhoeven Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Laurent Pinchart Acked-by: Rafael J. Wysocki Cc: stable@vger.kernel.org # 2.6.37+ Cc: Mika Westerberg Cc: Niklas Söderlund Cc: Simon Horman Cc: Yinghai Lu Cc: Matthew Garrett --- drivers/pci/pci.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9779483f81fd..6541cba8eef0 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1784,8 +1784,8 @@ static void pci_pme_list_scan(struct work_struct *work) } } if (!list_empty(&pci_pme_list)) - schedule_delayed_work(&pci_pme_work, - msecs_to_jiffies(PME_TIMEOUT)); + queue_delayed_work(system_freezable_wq, &pci_pme_work, + msecs_to_jiffies(PME_TIMEOUT)); mutex_unlock(&pci_pme_list_mutex); } @@ -1850,8 +1850,9 @@ void pci_pme_active(struct pci_dev *dev, bool enable) mutex_lock(&pci_pme_list_mutex); list_add(&pme_dev->list, &pci_pme_list); if (list_is_singular(&pci_pme_list)) - schedule_delayed_work(&pci_pme_work, - msecs_to_jiffies(PME_TIMEOUT)); + queue_delayed_work(system_freezable_wq, + &pci_pme_work, + msecs_to_jiffies(PME_TIMEOUT)); mutex_unlock(&pci_pme_list_mutex); } else { mutex_lock(&pci_pme_list_mutex); -- cgit v1.2.3 From 0a701aa6378496ea54fb065c68b41d918e372e94 Mon Sep 17 00:00:00 2001 From: Yongji Xie Date: Mon, 10 Apr 2017 19:58:12 +0800 Subject: PCI: Add pcibios_default_alignment() for arch-specific alignment control When VFIO passes through a PCI device to a guest, it does not allow the guest to mmap BARs that are smaller than PAGE_SIZE unless it can reserve the rest of the page (see vfio_pci_probe_mmaps()). This is because a page might contain several small BARs for unrelated devices and a guest should not be able to access all of them. VFIO emulates guest accesses to non-mappable BARs, which is functional but slow. On systems with large page sizes, e.g., PowerNV with 64K pages, BARs are more likely to share a page and performance is more likely to be a problem. Add a weak function to set default alignment for all PCI devices. An arch can override it to force the PCI core to place memory BARs on their own pages. Signed-off-by: Yongji Xie Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 679af2a253ad..d22fb35be26e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4947,6 +4947,11 @@ void pci_ignore_hotplug(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_ignore_hotplug); +resource_size_t __weak pcibios_default_alignment(void) +{ + return 0; +} + #define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0}; static DEFINE_SPINLOCK(resource_alignment_lock); @@ -4962,14 +4967,15 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) { int seg, bus, slot, func, align_order, count; unsigned short vendor, device, subsystem_vendor, subsystem_device; - resource_size_t align = 0; + resource_size_t align = pcibios_default_alignment(); char *p; spin_lock(&resource_alignment_lock); p = resource_alignment_param; - if (!*p) + if (!*p && !align) goto out; if (pci_has_flag(PCI_PROBE_ONLY)) { + align = 0; pr_info_once("PCI: Ignoring requested alignments (PCI_PROBE_ONLY)\n"); goto out; } -- cgit v1.2.3 From 81a5e70e0de55707c3184d186b2103b5bb9377ec Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 14 Apr 2017 14:12:06 -0500 Subject: PCI: Factor pci_reassigndev_resource_alignment() Pull the BAR size adjustment out into a new function, pci_request_resource_alignment(), and add a comment about how and why we increase the resource size and alignment. Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 69 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 22 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d22fb35be26e..1d8bb090a40b 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5049,6 +5049,48 @@ out: return align; } +static void pci_request_resource_alignment(struct pci_dev *dev, int bar, + resource_size_t align) +{ + struct resource *r = &dev->resource[bar]; + resource_size_t size; + + if (!(r->flags & IORESOURCE_MEM)) + return; + + if (r->flags & IORESOURCE_PCI_FIXED) { + dev_info(&dev->dev, "BAR%d %pR: ignoring requested alignment %#llx\n", + bar, r, (unsigned long long)align); + return; + } + + size = resource_size(r); + if (size < align) { + + /* + * Increase the size of the resource. BARs are aligned on + * their size, so when we reallocate space for this + * resource, we'll allocate it with the larger alignment. + * It also prevents assignment of any other BARs inside the + * size. If we're requesting page alignment, this means no + * other BARs will share the page. + * + * This makes the resource larger than the hardware BAR, + * which may break drivers that compute things based on the + * resource size, e.g., to find registers at a fixed offset + * before the end of the BAR. We hope users don't request + * alignment for such devices. + */ + size = align; + dev_info(&dev->dev, "BAR%d %pR: requesting alignment to %#llx\n", + bar, r, (unsigned long long)align); + + } + r->flags |= IORESOURCE_UNSET; + r->end = size - 1; + r->start = 0; +} + /* * This function disables memory decoding and releases memory resources * of the device specified by kernel's boot parameter 'pci=resource_alignment='. @@ -5060,7 +5102,7 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev) { int i; struct resource *r; - resource_size_t align, size; + resource_size_t align; u16 command; /* @@ -5090,28 +5132,11 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev) command &= ~PCI_COMMAND_MEMORY; pci_write_config_word(dev, PCI_COMMAND, command); - for (i = 0; i <= PCI_ROM_RESOURCE; i++) { - r = &dev->resource[i]; - if (!(r->flags & IORESOURCE_MEM)) - continue; - if (r->flags & IORESOURCE_PCI_FIXED) { - dev_info(&dev->dev, "Ignoring requested alignment for BAR%d: %pR\n", - i, r); - continue; - } + for (i = 0; i <= PCI_ROM_RESOURCE; i++) + pci_request_resource_alignment(dev, i, align); - size = resource_size(r); - if (size < align) { - size = align; - dev_info(&dev->dev, - "Rounding up size of resource #%d to %#llx.\n", - i, (unsigned long long)size); - } - r->flags |= IORESOURCE_UNSET; - r->end = size - 1; - r->start = 0; - } - /* Need to disable bridge's resource window, + /* + * Need to disable bridge's resource window, * to enable the kernel to reassign new resource * window later on. */ -- cgit v1.2.3 From 0dde1c08d1b9dea01cefb327dba8a6e3ae795214 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 17 Apr 2017 15:20:58 -0500 Subject: PCI: Don't reassign resources that are already aligned The "pci=resource_alignment=" kernel argument designates devices for which we want alignment greater than is required by the PCI specs. Previously we set IORESOURCE_UNSET for every MEM resource of those devices, even if the resource was *already* sufficiently aligned. If a resource is already sufficiently aligned, leave it alone and don't try to reassign it. Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 1d8bb090a40b..7eb6eb384e36 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5065,30 +5065,28 @@ static void pci_request_resource_alignment(struct pci_dev *dev, int bar, } size = resource_size(r); - if (size < align) { + if (size >= align) + return; - /* - * Increase the size of the resource. BARs are aligned on - * their size, so when we reallocate space for this - * resource, we'll allocate it with the larger alignment. - * It also prevents assignment of any other BARs inside the - * size. If we're requesting page alignment, this means no - * other BARs will share the page. - * - * This makes the resource larger than the hardware BAR, - * which may break drivers that compute things based on the - * resource size, e.g., to find registers at a fixed offset - * before the end of the BAR. We hope users don't request - * alignment for such devices. - */ - size = align; - dev_info(&dev->dev, "BAR%d %pR: requesting alignment to %#llx\n", - bar, r, (unsigned long long)align); + /* + * Increase the size of the resource. BARs are aligned on their + * size, so when we reallocate space for this resource, we'll + * allocate it with the larger alignment. It also prevents + * assignment of any other BARs inside the size. If we're + * requesting page alignment, this means no other BARs will share + * the page. + * + * This makes the resource larger than the hardware BAR, which may + * break drivers that compute things based on the resource size, + * e.g., to find registers at a fixed offset before the end of the + * BAR. We hope users don't request alignment for such devices. + */ + dev_info(&dev->dev, "BAR%d %pR: requesting alignment to %#llx\n", + bar, r, (unsigned long long)align); - } - r->flags |= IORESOURCE_UNSET; - r->end = size - 1; r->start = 0; + r->end = align - 1; + r->flags |= IORESOURCE_UNSET; } /* -- cgit v1.2.3 From e3adec72a3c50b733eb277a9f93c044f59ff55eb Mon Sep 17 00:00:00 2001 From: Yongji Xie Date: Mon, 10 Apr 2017 19:58:14 +0800 Subject: PCI: Don't resize resources when realigning all devices in system The "pci=resource_alignment" argument aligns BARs of designated devices by artificially increasing their size. Increasing the size increases the alignment and prevents other resources from being assigned in the same alignment region, e.g., in the same page, but it can break drivers that use the BAR size to locate things, e.g., ilo_map_device() does this: off = pci_resource_len(pdev, bar) - 0x2000; The new pcibios_default_alignment() interface allows an arch to request that *all* BARs in the system be aligned to a larger size. In this case, we don't need to artificially increase the resource size because we know every BAR of every device will be realigned, so nothing will share the same alignment region. Use IORESOURCE_STARTALIGN to request realignment of PCI BARs when we know we're realigning all BARs in the system. [bhelgaas: comment, changelog] Signed-off-by: Yongji Xie Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 59 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 16 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7eb6eb384e36..c140cb29ee7e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4959,11 +4959,13 @@ static DEFINE_SPINLOCK(resource_alignment_lock); /** * pci_specified_resource_alignment - get resource alignment specified by user. * @dev: the PCI device to get + * @resize: whether or not to change resources' size when reassigning alignment * * RETURNS: Resource alignment if it is specified. * Zero if it is not specified. */ -static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) +static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev, + bool *resize) { int seg, bus, slot, func, align_order, count; unsigned short vendor, device, subsystem_vendor, subsystem_device; @@ -5005,6 +5007,7 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) (!device || (device == dev->device)) && (!subsystem_vendor || (subsystem_vendor == dev->subsystem_vendor)) && (!subsystem_device || (subsystem_device == dev->subsystem_device))) { + *resize = true; if (align_order == -1) align = PAGE_SIZE; else @@ -5030,6 +5033,7 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) bus == dev->bus->number && slot == PCI_SLOT(dev->devfn) && func == PCI_FUNC(dev->devfn)) { + *resize = true; if (align_order == -1) align = PAGE_SIZE; else @@ -5050,7 +5054,7 @@ out: } static void pci_request_resource_alignment(struct pci_dev *dev, int bar, - resource_size_t align) + resource_size_t align, bool resize) { struct resource *r = &dev->resource[bar]; resource_size_t size; @@ -5069,23 +5073,45 @@ static void pci_request_resource_alignment(struct pci_dev *dev, int bar, return; /* - * Increase the size of the resource. BARs are aligned on their - * size, so when we reallocate space for this resource, we'll - * allocate it with the larger alignment. It also prevents - * assignment of any other BARs inside the size. If we're - * requesting page alignment, this means no other BARs will share - * the page. + * Increase the alignment of the resource. There are two ways we + * can do this: * - * This makes the resource larger than the hardware BAR, which may - * break drivers that compute things based on the resource size, - * e.g., to find registers at a fixed offset before the end of the - * BAR. We hope users don't request alignment for such devices. + * 1) Increase the size of the resource. BARs are aligned on their + * size, so when we reallocate space for this resource, we'll + * allocate it with the larger alignment. This also prevents + * assignment of any other BARs inside the alignment region, so + * if we're requesting page alignment, this means no other BARs + * will share the page. + * + * The disadvantage is that this makes the resource larger than + * the hardware BAR, which may break drivers that compute things + * based on the resource size, e.g., to find registers at a + * fixed offset before the end of the BAR. + * + * 2) Retain the resource size, but use IORESOURCE_STARTALIGN and + * set r->start to the desired alignment. By itself this + * doesn't prevent other BARs being put inside the alignment + * region, but if we realign *every* resource of every device in + * the system, none of them will share an alignment region. + * + * When the user has requested alignment for only some devices via + * the "pci=resource_alignment" argument, "resize" is true and we + * use the first method. Otherwise we assume we're aligning all + * devices and we use the second. */ + dev_info(&dev->dev, "BAR%d %pR: requesting alignment to %#llx\n", bar, r, (unsigned long long)align); - r->start = 0; - r->end = align - 1; + if (resize) { + r->start = 0; + r->end = align - 1; + } else { + r->flags &= ~IORESOURCE_SIZEALIGN; + r->flags |= IORESOURCE_STARTALIGN; + r->start = align; + r->end = r->start + size - 1; + } r->flags |= IORESOURCE_UNSET; } @@ -5102,6 +5128,7 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev) struct resource *r; resource_size_t align; u16 command; + bool resize = false; /* * VF BARs are read-only zero according to SR-IOV spec r1.1, sec @@ -5113,7 +5140,7 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev) return; /* check if specified PCI is target device to reassign */ - align = pci_specified_resource_alignment(dev); + align = pci_specified_resource_alignment(dev, &resize); if (!align) return; @@ -5131,7 +5158,7 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev) pci_write_config_word(dev, PCI_COMMAND, command); for (i = 0; i <= PCI_ROM_RESOURCE; i++) - pci_request_resource_alignment(dev, i, align); + pci_request_resource_alignment(dev, i, align, resize); /* * Need to disable bridge's resource window, -- cgit v1.2.3 From 7b309aef0463340d3ad5449d1f605d14e10a4225 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 19 Apr 2017 17:48:50 +0100 Subject: PCI: Remove __weak tag from pci_remap_iospace() pci_remap_iospace() is marked as a weak symbol even though no architecture is currently overriding it; given that its implementation internals have already code paths that are arch specific (ie PCI_IOBASE and ioremap_page_range() attributes) there is no need to leave the weak symbol in the kernel since the same functionality can be achieved by customizing per-arch the corresponding functionality. Remove the __weak symbol from pci_remap_iospace(). Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Acked-by: Arnd Bergmann --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7904d02ffdb9..bd98674a0419 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3363,7 +3363,7 @@ unsigned long __weak pci_address_to_pio(phys_addr_t address) * Only architectures that have memory mapped IO functions defined * (and the PCI_IOBASE value defined) should call this function. */ -int __weak pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr) +int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr) { #if defined(PCI_IOBASE) && defined(CONFIG_MMU) unsigned long vaddr = (unsigned long)PCI_IOBASE + res->start; -- cgit v1.2.3 From a60a2b73ba69abca26653fff157b0fd8947bc498 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Apr 2017 21:11:25 +0200 Subject: PCI: Export pcie_flr() Currently we opencode the FLR sequence in lots of place; export a core helper instead. We split out the probing for FLR support as all the non-core callers already know their hardware. Note that in the new pci_has_flr() function the quirk check has been moved before the capability check as there is no point in reading the capability in this case. Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index bef14777bb30..957a11a6a840 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3773,27 +3773,41 @@ static void pci_flr_wait(struct pci_dev *dev) (i - 1) * 100); } -static int pcie_flr(struct pci_dev *dev, int probe) +/** + * pcie_has_flr - check if a device supports function level resets + * @dev: device to check + * + * Returns true if the device advertises support for PCIe function level + * resets. + */ +static bool pcie_has_flr(struct pci_dev *dev) { u32 cap; - pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap); - if (!(cap & PCI_EXP_DEVCAP_FLR)) - return -ENOTTY; - if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET) - return -ENOTTY; + return false; - if (probe) - return 0; + pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap); + return cap & PCI_EXP_DEVCAP_FLR; +} +/** + * pcie_flr - initiate a PCIe function level reset + * @dev: device to reset + * + * Initiate a function level reset on @dev. The caller should ensure the + * device supports FLR before calling this function, e.g. by using the + * pcie_has_flr() helper. + */ +void pcie_flr(struct pci_dev *dev) +{ if (!pci_wait_for_pending_transaction(dev)) dev_err(&dev->dev, "timed out waiting for pending transaction; performing function level reset anyway\n"); pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR); pci_flr_wait(dev); - return 0; } +EXPORT_SYMBOL_GPL(pcie_flr); static int pci_af_flr(struct pci_dev *dev, int probe) { @@ -3977,9 +3991,12 @@ static int __pci_dev_reset(struct pci_dev *dev, int probe) if (rc != -ENOTTY) goto done; - rc = pcie_flr(dev, probe); - if (rc != -ENOTTY) + if (pcie_has_flr(dev)) { + if (!probe) + pcie_flr(dev); + rc = 0; goto done; + } rc = pci_af_flr(dev, probe); if (rc != -ENOTTY) -- cgit v1.2.3 From f90b0875463e450c5dcb714fb887720ecb53a600 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 9 Mar 2017 18:46:16 -0800 Subject: PCI: Export pci_remap_iospace() and pci_unmap_iospace() These are useful for PCIe host drivers, and those drivers can be modules. [bhelgaas: don't remove __weak; it's removed elsewhere] Signed-off-by: Brian Norris Signed-off-by: Bjorn Helgaas Acked-by: Shawn Lin --- drivers/pci/pci.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7904d02ffdb9..c87d1edf0203 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3383,6 +3383,7 @@ int __weak pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr) return -ENODEV; #endif } +EXPORT_SYMBOL(pci_remap_iospace); /** * pci_unmap_iospace - Unmap the memory mapped I/O space @@ -3400,6 +3401,7 @@ void pci_unmap_iospace(struct resource *res) unmap_kernel_range(vaddr, resource_size(res)); #endif } +EXPORT_SYMBOL(pci_unmap_iospace); static void __pci_set_master(struct pci_dev *dev, bool enable) { -- cgit v1.2.3 From 490cb6ddb17df5ef5f5eb33c9a34f3033b31c204 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 19 Apr 2017 17:48:55 +0100 Subject: PCI: Implement devm_pci_remap_cfgspace() The introduction of the pci_remap_cfgspace() interface allows PCI host controller drivers to map PCI config space through a dedicated kernel interface. Current PCI host controller drivers use the devm_ioremap_*() devres interfaces to map PCI configuration space regions so in order to update them to the new pci_remap_cfgspace() mapping interface a new set of devres interfaces should be implemented so that PCI host controller drivers can make use of them. Introduce two new functions in the PCI kernel layer and Devres documentation: - devm_pci_remap_cfgspace() - devm_pci_remap_cfg_resource() so that PCI host controller drivers can make use of them to map PCI configuration space regions. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Cc: Jonathan Corbet --- drivers/pci/pci.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index bd98674a0419..4129f9453861 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3401,6 +3401,88 @@ void pci_unmap_iospace(struct resource *res) #endif } +/** + * devm_pci_remap_cfgspace - Managed pci_remap_cfgspace() + * @dev: Generic device to remap IO address for + * @offset: Resource address to map + * @size: Size of map + * + * Managed pci_remap_cfgspace(). Map is automatically unmapped on driver + * detach. + */ +void __iomem *devm_pci_remap_cfgspace(struct device *dev, + resource_size_t offset, + resource_size_t size) +{ + void __iomem **ptr, *addr; + + ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; + + addr = pci_remap_cfgspace(offset, size); + if (addr) { + *ptr = addr; + devres_add(dev, ptr); + } else + devres_free(ptr); + + return addr; +} +EXPORT_SYMBOL(devm_pci_remap_cfgspace); + +/** + * devm_pci_remap_cfg_resource - check, request region and ioremap cfg resource + * @dev: generic device to handle the resource for + * @res: configuration space resource to be handled + * + * Checks that a resource is a valid memory region, requests the memory + * region and ioremaps with pci_remap_cfgspace() API that ensures the + * proper PCI configuration space memory attributes are guaranteed. + * + * All operations are managed and will be undone on driver detach. + * + * Returns a pointer to the remapped memory or an ERR_PTR() encoded error code + * on failure. Usage example: + * + * res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + * base = devm_pci_remap_cfg_resource(&pdev->dev, res); + * if (IS_ERR(base)) + * return PTR_ERR(base); + */ +void __iomem *devm_pci_remap_cfg_resource(struct device *dev, + struct resource *res) +{ + resource_size_t size; + const char *name; + void __iomem *dest_ptr; + + BUG_ON(!dev); + + if (!res || resource_type(res) != IORESOURCE_MEM) { + dev_err(dev, "invalid resource\n"); + return IOMEM_ERR_PTR(-EINVAL); + } + + size = resource_size(res); + name = res->name ?: dev_name(dev); + + if (!devm_request_mem_region(dev, res->start, size, name)) { + dev_err(dev, "can't request region for resource %pR\n", res); + return IOMEM_ERR_PTR(-EBUSY); + } + + dest_ptr = devm_pci_remap_cfgspace(dev, res->start, size); + if (!dest_ptr) { + dev_err(dev, "ioremap failed for resource %pR\n", res); + devm_release_mem_region(dev, res->start, size); + dest_ptr = IOMEM_ERR_PTR(-ENOMEM); + } + + return dest_ptr; +} +EXPORT_SYMBOL(devm_pci_remap_cfg_resource); + static void __pci_set_master(struct pci_dev *dev, bool enable) { u16 old_cmd, cmd; -- cgit v1.2.3