diff options
323 files changed, 9270 insertions, 5418 deletions
diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt index d718bc2ff1cf..bf5dbe3ab8c5 100644 --- a/Documentation/networking/dccp.txt +++ b/Documentation/networking/dccp.txt @@ -18,8 +18,8 @@ Introduction Datagram Congestion Control Protocol (DCCP) is an unreliable, connection oriented protocol designed to solve issues present in UDP and TCP, particularly for real-time and multimedia (streaming) traffic. -It divides into a base protocol (RFC 4340) and plugable congestion control -modules called CCIDs. Like plugable TCP congestion control, at least one CCID +It divides into a base protocol (RFC 4340) and pluggable congestion control +modules called CCIDs. Like pluggable TCP congestion control, at least one CCID needs to be enabled in order for the protocol to function properly. In the Linux implementation, this is the TCP-like CCID2 (RFC 4341). Additional CCIDs, such as the TCP-friendly CCID3 (RFC 4342), are optional. diff --git a/Documentation/networking/e100.txt b/Documentation/networking/e100.txt index 13a32124bca0..f862cf3aff34 100644 --- a/Documentation/networking/e100.txt +++ b/Documentation/networking/e100.txt @@ -103,7 +103,7 @@ Additional Configurations PRO/100 Family of Adapters is e100. As an example, if you install the e100 driver for two PRO/100 adapters - (eth0 and eth1), add the following to a configuraton file in /etc/modprobe.d/ + (eth0 and eth1), add the following to a configuration file in /etc/modprobe.d/ alias eth0 e100 alias eth1 e100 diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt index 09eb57329f11..22bbc7225f8e 100644 --- a/Documentation/networking/ieee802154.txt +++ b/Documentation/networking/ieee802154.txt @@ -4,7 +4,7 @@ Introduction ============ -The IEEE 802.15.4 working group focuses on standartization of bottom +The IEEE 802.15.4 working group focuses on standardization of bottom two layers: Medium Access Control (MAC) and Physical (PHY). And there are mainly two options available for upper layers: - ZigBee - proprietary protocol from ZigBee Alliance @@ -66,7 +66,7 @@ net_device, with .type = ARPHRD_IEEE802154. Data is exchanged with socket family code via plain sk_buffs. On skb reception skb->cb must contain additional info as described in the struct ieee802154_mac_cb. During packet transmission the skb->cb is used to provide additional data to device's header_ops->create -function. Be aware, that this data can be overriden later (when socket code +function. Be aware that this data can be overridden later (when socket code submits skb to qdisc), so if you need something from that cb later, you should store info in the skb->data on your own. diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index a46d78583ae1..8b8a05787641 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -267,17 +267,6 @@ tcp_max_orphans - INTEGER more aggressively. Let me to remind again: each orphan eats up to ~64K of unswappable memory. -tcp_max_ssthresh - INTEGER - Limited Slow-Start for TCP with large congestion windows (cwnd) defined in - RFC3742. Limited slow-start is a mechanism to limit growth of the cwnd - on the region where cwnd is larger than tcp_max_ssthresh. TCP increases cwnd - by at most tcp_max_ssthresh segments, and by at least tcp_max_ssthresh/2 - segments per RTT when the cwnd is above tcp_max_ssthresh. - If TCP connection increased cwnd to thousands (or tens of thousands) segments, - and thousands of packets were being dropped during slow-start, you can set - tcp_max_ssthresh to improve performance for new TCP connection. - Default: 0 (off) - tcp_max_syn_backlog - INTEGER Maximal number of remembered connection requests, which have not received an acknowledgment from connecting client. @@ -451,7 +440,7 @@ tcp_fastopen - INTEGER connect() to perform a TCP handshake automatically. The values (bitmap) are - 1: Enables sending data in the opening SYN on the client. + 1: Enables sending data in the opening SYN on the client w/ MSG_FASTOPEN. 2: Enables TCP Fast Open on the server side, i.e., allowing data in a SYN packet to be accepted and passed to the application before 3-way hand shake finishes. @@ -464,7 +453,7 @@ tcp_fastopen - INTEGER different ways of setting max_qlen without the TCP_FASTOPEN socket option. - Default: 0 + Default: 1 Note that the client & server side Fast Open flags (1 and 2 respectively) must be also enabled before the rest of flags can take diff --git a/Documentation/networking/l2tp.txt b/Documentation/networking/l2tp.txt index e63fc1f7bf87..c74434de2fa5 100644 --- a/Documentation/networking/l2tp.txt +++ b/Documentation/networking/l2tp.txt @@ -197,7 +197,7 @@ state information because the file format is subject to change. It is implemented to provide extra debug information to help diagnose problems.) Users should use the netlink API. -/proc/net/pppol2tp is also provided for backwards compaibility with +/proc/net/pppol2tp is also provided for backwards compatibility with the original pppol2tp driver. It lists information about L2TPv2 tunnels and sessions only. Its use is discouraged. diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt index d9112f01c44a..0fe1c6e0dbcd 100644 --- a/Documentation/networking/netdev-FAQ.txt +++ b/Documentation/networking/netdev-FAQ.txt @@ -4,23 +4,23 @@ Information you need to know about netdev Q: What is netdev? -A: It is a mailing list for all network related linux stuff. This includes +A: It is a mailing list for all network-related Linux stuff. This includes anything found under net/ (i.e. core code like IPv6) and drivers/net - (i.e. hardware specific drivers) in the linux source tree. + (i.e. hardware specific drivers) in the Linux source tree. Note that some subsystems (e.g. wireless drivers) which have a high volume of traffic have their own specific mailing lists. - The netdev list is managed (like many other linux mailing lists) through + The netdev list is managed (like many other Linux mailing lists) through VGER ( http://vger.kernel.org/ ) and archives can be found below: http://marc.info/?l=linux-netdev http://www.spinics.net/lists/netdev/ - Aside from subsystems like that mentioned above, all network related linux - development (i.e. RFC, review, comments, etc) takes place on netdev. + Aside from subsystems like that mentioned above, all network-related Linux + development (i.e. RFC, review, comments, etc.) takes place on netdev. -Q: How do the changes posted to netdev make their way into linux? +Q: How do the changes posted to netdev make their way into Linux? A: There are always two trees (git repositories) in play. Both are driven by David Miller, the main network maintainer. There is the "net" tree, @@ -35,7 +35,7 @@ A: There are always two trees (git repositories) in play. Both are driven Q: How often do changes from these trees make it to the mainline Linus tree? A: To understand this, you need to know a bit of background information - on the cadence of linux development. Each new release starts off with + on the cadence of Linux development. Each new release starts off with a two week "merge window" where the main maintainers feed their new stuff to Linus for merging into the mainline tree. After the two weeks, the merge window is closed, and it is called/tagged "-rc1". No new @@ -46,7 +46,7 @@ A: To understand this, you need to know a bit of background information things are in a state of churn), and a week after the last vX.Y-rcN was done, the official "vX.Y" is released. - Relating that to netdev: At the beginning of the 2 week merge window, + Relating that to netdev: At the beginning of the 2-week merge window, the net-next tree will be closed - no new changes/features. The accumulated new content of the past ~10 weeks will be passed onto mainline/Linus via a pull request for vX.Y -- at the same time, @@ -59,16 +59,16 @@ A: To understand this, you need to know a bit of background information IMPORTANT: Do not send new net-next content to netdev during the period during which net-next tree is closed. - Shortly after the two weeks have passed, (and vX.Y-rc1 is released) the + Shortly after the two weeks have passed (and vX.Y-rc1 is released), the tree for net-next reopens to collect content for the next (vX.Y+1) release. If you aren't subscribed to netdev and/or are simply unsure if net-next has re-opened yet, simply check the net-next git repository link above for - any new networking related commits. + any new networking-related commits. The "net" tree continues to collect fixes for the vX.Y content, and is fed back to Linus at regular (~weekly) intervals. Meaning that the - focus for "net" is on stablilization and bugfixes. + focus for "net" is on stabilization and bugfixes. Finally, the vX.Y gets released, and the whole cycle starts over. @@ -217,7 +217,7 @@ A: Attention to detail. Re-read your own work as if you were the to why it happens, and then if necessary, explain why the fix proposed is the best way to get things done. Don't mangle whitespace, and as is common, don't mis-indent function arguments that span multiple lines. - If it is your 1st patch, mail it to yourself so you can test apply + If it is your first patch, mail it to yourself so you can test apply it to an unpatched tree to confirm infrastructure didn't mangle it. Finally, go back and read Documentation/SubmittingPatches to be diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt index c7ecc7080494..0b1cf6b2a592 100644 --- a/Documentation/networking/netdevices.txt +++ b/Documentation/networking/netdevices.txt @@ -10,12 +10,12 @@ network devices. struct net_device allocation rules ================================== Network device structures need to persist even after module is unloaded and -must be allocated with kmalloc. If device has registered successfully, -it will be freed on last use by free_netdev. This is required to handle the -pathologic case cleanly (example: rmmod mydriver </sys/class/net/myeth/mtu ) +must be allocated with alloc_netdev_mqs() and friends. +If device has registered successfully, it will be freed on last use +by free_netdev(). This is required to handle the pathologic case cleanly +(example: rmmod mydriver </sys/class/net/myeth/mtu ) -There are routines in net_init.c to handle the common cases of -alloc_etherdev, alloc_netdev. These reserve extra space for driver +alloc_netdev_mqs()/alloc_netdev() reserve extra space for driver private data which gets freed when the network device is freed. If separately allocated data is attached to the network device (netdev_priv(dev)) then it is up to the module exit handler to free that. diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt index 533378839546..b26122973525 100644 --- a/Documentation/networking/netlink_mmap.txt +++ b/Documentation/networking/netlink_mmap.txt @@ -45,7 +45,7 @@ processing. Conversion of the reception path involves calling poll() on the file descriptor, once the socket is readable the frames from the ring are -processsed in order until no more messages are available, as indicated by +processed in order until no more messages are available, as indicated by a status word in the frame header. On kernel side, in order to make use of memory mapped I/O on receive, the @@ -56,7 +56,7 @@ Dumps of kernel databases automatically support memory mapped I/O. Conversion of the transmit path involves changing message construction to use memory from the TX ring instead of (usually) a buffer declared on the -stack and setting up the frame header approriately. Optionally poll() can +stack and setting up the frame header appropriately. Optionally poll() can be used to wait for free frames in the TX ring. Structured and definitions for using memory mapped I/O are contained in @@ -231,7 +231,7 @@ Ring setup: if (setsockopt(fd, NETLINK_TX_RING, &req, sizeof(req)) < 0) exit(1) - /* Calculate size of each invididual ring */ + /* Calculate size of each individual ring */ ring_size = req.nm_block_nr * req.nm_block_size; /* Map RX/TX rings. The TX ring is located after the RX ring */ diff --git a/Documentation/networking/operstates.txt b/Documentation/networking/operstates.txt index 97694572338b..355c6d8ef8ad 100644 --- a/Documentation/networking/operstates.txt +++ b/Documentation/networking/operstates.txt @@ -89,8 +89,8 @@ packets. The name 'carrier' and the inversion are historical, think of it as lower layer. Note that for certain kind of soft-devices, which are not managing any -real hardware, there is possible to set this bit from userpsace. -One should use TVL IFLA_CARRIER to do so. +real hardware, it is possible to set this bit from userspace. One +should use TVL IFLA_CARRIER to do so. netif_carrier_ok() can be used to query that bit. diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index 60d05eb77c64..b89bc82eed46 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -144,7 +144,7 @@ An overview of the RxRPC protocol: (*) Calls use ACK packets to handle reliability. Data packets are also explicitly sequenced per call. - (*) There are two types of positive acknowledgement: hard-ACKs and soft-ACKs. + (*) There are two types of positive acknowledgment: hard-ACKs and soft-ACKs. A hard-ACK indicates to the far side that all the data received to a point has been received and processed; a soft-ACK indicates that the data has been received but may yet be discarded and re-requested. The sender may diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt index 457b8bbafb08..cdd916da838d 100644 --- a/Documentation/networking/stmmac.txt +++ b/Documentation/networking/stmmac.txt @@ -160,7 +160,7 @@ Where: o pmt: core has the embedded power module (optional). o force_sf_dma_mode: force DMA to use the Store and Forward mode instead of the Threshold. - o force_thresh_dma_mode: force DMA to use the Shreshold mode other than + o force_thresh_dma_mode: force DMA to use the Threshold mode other than the Store and Forward mode. o riwt_off: force to disable the RX watchdog feature and switch to NAPI mode. o fix_mac_speed: this callback is used for modifying some syscfg registers @@ -175,7 +175,7 @@ Where: registers. o custom_cfg/custom_data: this is a custom configuration that can be passed while initializing the resources. - o bsp_priv: another private poiter. + o bsp_priv: another private pointer. For MDIO bus The we have: @@ -271,7 +271,7 @@ reset procedure etc). o dwmac1000_dma.c: dma functions for the GMAC chip; o dwmac1000.h: specific header file for the GMAC; o dwmac100_core: MAC 100 core and dma code; - o dwmac100_dma.c: dma funtions for the MAC chip; + o dwmac100_dma.c: dma functions for the MAC chip; o dwmac1000.h: specific header file for the MAC; o dwmac_lib.c: generic DMA functions shared among chips; o enh_desc.c: functions for handling enhanced descriptors; @@ -364,4 +364,4 @@ Auto-negotiated Link Parter Ability. 10) TODO: o XGMAC is not supported. o Complete the TBI & RTBI support. - o extened VLAN support for 3.70a SYNP GMAC. + o extend VLAN support for 3.70a SYNP GMAC. diff --git a/Documentation/networking/vortex.txt b/Documentation/networking/vortex.txt index 9a8041dcbb53..97282da82b75 100644 --- a/Documentation/networking/vortex.txt +++ b/Documentation/networking/vortex.txt @@ -68,7 +68,7 @@ Module parameters There are several parameters which may be provided to the driver when its module is loaded. These are usually placed in /etc/modprobe.d/*.conf -configuretion files. Example: +configuration files. Example: options 3c59x debug=3 rx_copybreak=300 @@ -178,7 +178,7 @@ max_interrupt_work=N The driver's interrupt service routine can handle many receive and transmit packets in a single invocation. It does this in a loop. - The value of max_interrupt_work governs how mnay times the interrupt + The value of max_interrupt_work governs how many times the interrupt service routine will loop. The default value is 32 loops. If this is exceeded the interrupt service routine gives up and generates a warning message "eth0: Too much work in interrupt". diff --git a/Documentation/networking/x25-iface.txt b/Documentation/networking/x25-iface.txt index 78f662ee0622..7f213b556e85 100644 --- a/Documentation/networking/x25-iface.txt +++ b/Documentation/networking/x25-iface.txt @@ -105,7 +105,7 @@ reduced by the following measures or a combination thereof: later. The lapb module interface was modified to support this. Its data_indication() method should now transparently pass the - netif_rx() return value to the (lapb mopdule) caller. + netif_rx() return value to the (lapb module) caller. (2) Drivers for kernel versions 2.2.x should always check the global variable netdev_dropping when a new frame is received. The driver should only call netif_rx() if netdev_dropping is zero. Otherwise diff --git a/MAINTAINERS b/MAINTAINERS index 66469731e78d..727d7e359182 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1009,6 +1009,7 @@ ARM/Marvell Armada 370 and Armada XP SOC support M: Jason Cooper <jason@lakedaemon.net> M: Andrew Lunn <andrew@lunn.ch> M: Gregory Clement <gregory.clement@free-electrons.com> +M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-mvebu/ @@ -1016,6 +1017,7 @@ F: arch/arm/mach-mvebu/ ARM/Marvell Dove/Kirkwood/MV78xx0/Orion SOC support M: Jason Cooper <jason@lakedaemon.net> M: Andrew Lunn <andrew@lunn.ch> +M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-dove/ @@ -1148,6 +1150,13 @@ F: drivers/net/ethernet/i825xx/ether1* F: drivers/net/ethernet/seeq/ether3* F: drivers/scsi/arm/ +ARM/Rockchip SoC support +M: Heiko Stuebner <heiko@sntech.de> +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: arch/arm/mach-rockchip/ +F: drivers/*/*rockchip* + ARM/SHARK MACHINE SUPPORT M: Alexander Schulz <alex@shark-linux.de> W: http://www.shark-linux.de/shark.html @@ -2719,6 +2728,8 @@ T: git git://git.linaro.org/people/sumitsemwal/linux-dma-buf.git DMA GENERIC OFFLOAD ENGINE SUBSYSTEM M: Vinod Koul <vinod.koul@intel.com> M: Dan Williams <dan.j.williams@intel.com> +L: dmaengine@vger.kernel.org +Q: https://patchwork.kernel.org/project/linux-dmaengine/list/ S: Supported F: drivers/dma/ F: include/linux/dma* @@ -2822,7 +2833,7 @@ M: Terje Bergström <tbergstrom@nvidia.com> L: dri-devel@lists.freedesktop.org L: linux-tegra@vger.kernel.org T: git git://anongit.freedesktop.org/tegra/linux.git -S: Maintained +S: Supported F: drivers/gpu/host1x/ F: include/uapi/drm/tegra_drm.h F: Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt @@ -4358,7 +4369,10 @@ F: arch/x86/kernel/microcode_intel.c INTEL I/OAT DMA DRIVER M: Dan Williams <dan.j.williams@intel.com> -S: Maintained +M: Dave Jiang <dave.jiang@intel.com> +L: dmaengine@vger.kernel.org +Q: https://patchwork.kernel.org/project/linux-dmaengine/list/ +S: Supported F: drivers/dma/ioat* INTEL IOMMU (VT-d) @@ -8310,14 +8324,72 @@ L: linux-media@vger.kernel.org S: Maintained F: drivers/media/rc/ttusbir.c -TEGRA SUPPORT +TEGRA ARCHITECTURE SUPPORT M: Stephen Warren <swarren@wwwdotorg.org> +M: Thierry Reding <thierry.reding@gmail.com> L: linux-tegra@vger.kernel.org Q: http://patchwork.ozlabs.org/project/linux-tegra/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/swarren/linux-tegra.git S: Supported N: [^a-z]tegra +TEGRA ASOC DRIVER +M: Stephen Warren <swarren@wwwdotorg.org> +S: Supported +F: sound/soc/tegra/ + +TEGRA CLOCK DRIVER +M: Peter De Schrijver <pdeschrijver@nvidia.com> +M: Prashant Gaikwad <pgaikwad@nvidia.com> +S: Supported +F: drivers/clk/tegra/ + +TEGRA DMA DRIVER +M: Laxman Dewangan <ldewangan@nvidia.com> +S: Supported +F: drivers/dma/tegra20-apb-dma.c + +TEGRA GPIO DRIVER +M: Stephen Warren <swarren@wwwdotorg.org> +S: Supported +F: drivers/gpio/gpio-tegra.c + +TEGRA I2C DRIVER +M: Laxman Dewangan <ldewangan@nvidia.com> +S: Supported +F: drivers/i2c/busses/i2c-tegra.c + +TEGRA IOMMU DRIVERS +M: Hiroshi Doyu <hdoyu@nvidia.com> +S: Supported +F: drivers/iommu/tegra* + +TEGRA KBC DRIVER +M: Rakesh Iyer <riyer@nvidia.com> +M: Laxman Dewangan <ldewangan@nvidia.com> +S: Supported +F: drivers/input/keyboard/tegra-kbc.c + +TEGRA PINCTRL DRIVER +M: Stephen Warren <swarren@wwwdotorg.org> +S: Supported +F: drivers/pinctrl/pinctrl-tegra* + +TEGRA PWM DRIVER +M: Thierry Reding <thierry.reding@gmail.com> +S: Supported +F: drivers/pwm/pwm-tegra.c + +TEGRA SERIAL DRIVER +M: Laxman Dewangan <ldewangan@nvidia.com> +S: Supported +F: drivers/tty/serial/serial-tegra.c + +TEGRA SPI DRIVER +M: Laxman Dewangan <ldewangan@nvidia.com> +S: Supported +F: drivers/spi/spi-tegra* + TEHUTI ETHERNET DRIVER M: Andy Gospodarek <andy@greyhouse.net> L: netdev@vger.kernel.org @@ -8853,61 +8925,14 @@ W: http://pegasus2.sourceforge.net/ S: Maintained F: drivers/net/usb/rtl8150.c -USB SERIAL BELKIN F5U103 DRIVER -M: William Greathouse <wgreathouse@smva.com> -L: linux-usb@vger.kernel.org -S: Maintained -F: drivers/usb/serial/belkin_sa.* - -USB SERIAL CYPRESS M8 DRIVER -M: Lonnie Mendez <dignome@gmail.com> -L: linux-usb@vger.kernel.org -S: Maintained -W: http://geocities.com/i0xox0i -W: http://firstlight.net/cvs -F: drivers/usb/serial/cypress_m8.* - -USB SERIAL CYBERJACK DRIVER -M: Matthias Bruestle and Harald Welte <support@reiner-sct.com> -W: http://www.reiner-sct.de/support/treiber_cyberjack.php -S: Maintained -F: drivers/usb/serial/cyberjack.c - -USB SERIAL DIGI ACCELEPORT DRIVER -M: Peter Berger <pberger@brimson.com> -M: Al Borchers <alborchers@steinerpoint.com> +USB SERIAL SUBSYSTEM +M: Johan Hovold <jhovold@gmail.com> L: linux-usb@vger.kernel.org S: Maintained -F: drivers/usb/serial/digi_acceleport.c - -USB SERIAL DRIVER -M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -L: linux-usb@vger.kernel.org -S: Supported F: Documentation/usb/usb-serial.txt -F: drivers/usb/serial/generic.c -F: drivers/usb/serial/usb-serial.c +F: drivers/usb/serial/ F: include/linux/usb/serial.h -USB SERIAL EMPEG EMPEG-CAR MARK I/II DRIVER -M: Gary Brubaker <xavyer@ix.netcom.com> -L: linux-usb@vger.kernel.org -S: Maintained -F: drivers/usb/serial/empeg.c - -USB SERIAL KEYSPAN DRIVER -M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -L: linux-usb@vger.kernel.org -S: Maintained -F: drivers/usb/serial/*keyspan* - -USB SERIAL WHITEHEAT DRIVER -M: Support Department <support@connecttech.com> -L: linux-usb@vger.kernel.org -W: http://www.connecttech.com -S: Supported -F: drivers/usb/serial/whiteheat* - USB SMSC75XX ETHERNET DRIVER M: Steve Glendinning <steve.glendinning@shawell.net> L: netdev@vger.kernel.org @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = NAME = One Giant Leap for Frogkind # *DOCUMENTATION* diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index d63f3de0cd5b..0c14d8a52683 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -17,7 +17,7 @@ #include <asm/pgalloc.h> #include <asm/mmu.h> -static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address) +static int handle_vmalloc_fault(unsigned long address) { /* * Synchronize this task's top level page-table @@ -27,7 +27,7 @@ static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address) pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; - pgd = pgd_offset_fast(mm, address); + pgd = pgd_offset_fast(current->active_mm, address); pgd_k = pgd_offset_k(address); if (!pgd_present(*pgd_k)) @@ -72,7 +72,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address) * nothing more. */ if (address >= VMALLOC_START && address <= VMALLOC_END) { - ret = handle_vmalloc_fault(mm, address); + ret = handle_vmalloc_fault(address); if (unlikely(ret)) goto bad_area_nosemaphore; else diff --git a/arch/arm/boot/dts/integratorcp.dts b/arch/arm/boot/dts/integratorcp.dts index ff1aea0ee043..72693a69f830 100644 --- a/arch/arm/boot/dts/integratorcp.dts +++ b/arch/arm/boot/dts/integratorcp.dts @@ -9,11 +9,6 @@ model = "ARM Integrator/CP"; compatible = "arm,integrator-cp"; - aliases { - arm,timer-primary = &timer2; - arm,timer-secondary = &timer1; - }; - chosen { bootargs = "root=/dev/ram0 console=ttyAMA0,38400n8 earlyprintk"; }; @@ -24,14 +19,18 @@ }; timer0: timer@13000000 { + /* TIMER0 runs @ 25MHz */ compatible = "arm,integrator-cp-timer"; + status = "disabled"; }; timer1: timer@13000100 { + /* TIMER1 runs @ 1MHz */ compatible = "arm,integrator-cp-timer"; }; timer2: timer@13000200 { + /* TIMER2 runs @ 1MHz */ compatible = "arm,integrator-cp-timer"; }; diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 45f1ffcf1a4b..24cdf64789c3 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -971,11 +971,11 @@ static const struct mips_perf_event mipsxx74Kcore_cache_map [C(LL)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = { 0x1c, CNTR_ODD, P }, - [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN | CNTR_ODD, P }, + [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN, P }, }, [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = { 0x1c, CNTR_ODD, P }, - [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN | CNTR_ODD, P }, + [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN, P }, }, }, [C(ITLB)] = { diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c index c69da3734699..5b28e81d94a0 100644 --- a/arch/mips/mti-malta/malta-int.c +++ b/arch/mips/mti-malta/malta-int.c @@ -473,7 +473,7 @@ static void __init fill_ipi_map(void) { int cpu; - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { fill_ipi_map1(gic_resched_int_base, cpu, GIC_CPU_INT1); fill_ipi_map1(gic_call_int_base, cpu, GIC_CPU_INT2); } @@ -574,8 +574,9 @@ void __init arch_init_irq(void) /* FIXME */ int i; #if defined(CONFIG_MIPS_MT_SMP) - gic_call_int_base = GIC_NUM_INTRS - NR_CPUS; - gic_resched_int_base = gic_call_int_base - NR_CPUS; + gic_call_int_base = GIC_NUM_INTRS - + (NR_CPUS - nr_cpu_ids) * 2 - nr_cpu_ids; + gic_resched_int_base = gic_call_int_base - nr_cpu_ids; fill_ipi_map(); #endif gic_init(GIC_BASE_ADDR, GIC_ADDRSPACE_SZ, gic_intr_map, @@ -599,7 +600,7 @@ void __init arch_init_irq(void) printk("CPU%d: status register now %08x\n", smp_processor_id(), read_c0_status()); write_c0_status(0x1100dc00); printk("CPU%d: status register frc %08x\n", smp_processor_id(), read_c0_status()); - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { arch_init_ipiirq(MIPS_GIC_IRQ_BASE + GIC_RESCHED_INT(i), &irq_resched); arch_init_ipiirq(MIPS_GIC_IRQ_BASE + diff --git a/arch/mips/ralink/timer.c b/arch/mips/ralink/timer.c index e49241a2c39a..202785709441 100644 --- a/arch/mips/ralink/timer.c +++ b/arch/mips/ralink/timer.c @@ -126,7 +126,7 @@ static int rt_timer_probe(struct platform_device *pdev) return -ENOENT; } - rt->membase = devm_request_and_ioremap(&pdev->dev, res); + rt->membase = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(rt->membase)) return PTR_ERR(rt->membase); diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index 37aabd772fbb..d2d58258aea6 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -195,6 +195,8 @@ common_stext: ldw MEM_PDC_HI(%r0),%r6 depd %r6, 31, 32, %r3 /* move to upper word */ + mfctl %cr30,%r6 /* PCX-W2 firmware bug */ + ldo PDC_PSW(%r0),%arg0 /* 21 */ ldo PDC_PSW_SET_DEFAULTS(%r0),%arg1 /* 2 */ ldo PDC_PSW_WIDE_BIT(%r0),%arg2 /* 2 */ @@ -203,6 +205,8 @@ common_stext: copy %r0,%arg3 stext_pdc_ret: + mtctl %r6,%cr30 /* restore task thread info */ + /* restore rfi target address*/ ldd TI_TASK-THREAD_SZ_ALGN(%sp), %r10 tophys_r1 %r10 diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c index 829df49dee99..41ebbfebb333 100644 --- a/arch/um/kernel/exitcode.c +++ b/arch/um/kernel/exitcode.c @@ -40,9 +40,11 @@ static ssize_t exitcode_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { char *end, buf[sizeof("nnnnn\0")]; + size_t size; int tmp; - if (copy_from_user(buf, buffer, count)) + size = min(count, sizeof(buf)); + if (copy_from_user(buf, buffer, size)) return -EFAULT; tmp = simple_strtol(buf, &end, 0); diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0da5200ee79d..b3e18f800302 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -128,7 +128,8 @@ do { \ do { \ typedef typeof(var) pao_T__; \ const int pao_ID__ = (__builtin_constant_p(val) && \ - ((val) == 1 || (val) == -1)) ? (val) : 0; \ + ((val) == 1 || (val) == -1)) ? \ + (int)(val) : 0; \ if (0) { \ pao_T__ pao_tmp__; \ pao_tmp__ = (val); \ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9d8449158cf9..8a87a3224121 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1276,16 +1276,16 @@ void perf_events_lapic_init(void) static int __kprobes perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) { - int ret; u64 start_clock; u64 finish_clock; + int ret; if (!atomic_read(&active_events)) return NMI_DONE; - start_clock = local_clock(); + start_clock = sched_clock(); ret = x86_pmu.handle_irq(regs); - finish_clock = local_clock(); + finish_clock = sched_clock(); perf_sample_event_took(finish_clock - start_clock); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a0e2a8a80c94..b2046e4d0b59 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -609,7 +609,7 @@ static struct dentry *d_kvm_debug; struct dentry *kvm_init_debugfs(void) { - d_kvm_debug = debugfs_create_dir("kvm", NULL); + d_kvm_debug = debugfs_create_dir("kvm-guest", NULL); if (!d_kvm_debug) printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n"); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ba77ebc2c353..6fcb49ce50a1 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -113,10 +113,10 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 u64 before, delta, whole_msecs; int remainder_ns, decimal_msecs, thishandled; - before = local_clock(); + before = sched_clock(); thishandled = a->handler(type, regs); handled += thishandled; - delta = local_clock() - before; + delta = sched_clock() - before; trace_nmi_handler(a->handler, (int)delta, thishandled); if (delta < nmi_longest_ns) diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index de1dfa18d0a1..21dbe6bdb8ed 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -1122,7 +1122,7 @@ ENDPROC(fast_syscall_spill_registers) * a3: exctable, original value in excsave1 */ -fast_syscall_spill_registers_fixup: +ENTRY(fast_syscall_spill_registers_fixup) rsr a2, windowbase # get current windowbase (a2 is saved) xsr a0, depc # restore depc and a0 @@ -1134,22 +1134,26 @@ fast_syscall_spill_registers_fixup: */ xsr a3, excsave1 # get spill-mask - slli a2, a3, 1 # shift left by one + slli a3, a3, 1 # shift left by one - slli a3, a2, 32-WSBITS - src a2, a2, a3 # a1 = xxwww1yyxxxwww1yy...... + slli a2, a3, 32-WSBITS + src a2, a3, a2 # a2 = xxwww1yyxxxwww1yy...... wsr a2, windowstart # set corrected windowstart - rsr a3, excsave1 - l32i a2, a3, EXC_TABLE_DOUBLE_SAVE # restore a2 - l32i a3, a3, EXC_TABLE_PARAM # original WB (in user task) + srli a3, a3, 1 + rsr a2, excsave1 + l32i a2, a2, EXC_TABLE_DOUBLE_SAVE # restore a2 + xsr a2, excsave1 + s32i a3, a2, EXC_TABLE_DOUBLE_SAVE # save a3 + l32i a3, a2, EXC_TABLE_PARAM # original WB (in user task) + xsr a2, excsave1 /* Return to the original (user task) WINDOWBASE. * We leave the following frame behind: * a0, a1, a2 same - * a3: trashed (saved in excsave_1) + * a3: trashed (saved in EXC_TABLE_DOUBLE_SAVE) * depc: depc (we have to return to that address) - * excsave_1: a3 + * excsave_1: exctable */ wsr a3, windowbase @@ -1159,9 +1163,9 @@ fast_syscall_spill_registers_fixup: * a0: return address * a1: used, stack pointer * a2: kernel stack pointer - * a3: available, saved in EXCSAVE_1 + * a3: available * depc: exception address - * excsave: a3 + * excsave: exctable * Note: This frame might be the same as above. */ @@ -1181,9 +1185,12 @@ fast_syscall_spill_registers_fixup: rsr a0, exccause addx4 a0, a0, a3 # find entry in table l32i a0, a0, EXC_TABLE_FAST_USER # load handler + l32i a3, a3, EXC_TABLE_DOUBLE_SAVE jx a0 -fast_syscall_spill_registers_fixup_return: +ENDPROC(fast_syscall_spill_registers_fixup) + +ENTRY(fast_syscall_spill_registers_fixup_return) /* When we return here, all registers have been restored (a2: DEPC) */ @@ -1191,13 +1198,13 @@ fast_syscall_spill_registers_fixup_return: /* Restore fixup handler. */ - xsr a3, excsave1 - movi a2, fast_syscall_spill_registers_fixup - s32i a2, a3, EXC_TABLE_FIXUP - s32i a0, a3, EXC_TABLE_DOUBLE_SAVE - rsr a2, windowbase - s32i a2, a3, EXC_TABLE_PARAM - l32i a2, a3, EXC_TABLE_KSTK + rsr a2, excsave1 + s32i a3, a2, EXC_TABLE_DOUBLE_SAVE + movi a3, fast_syscall_spill_registers_fixup + s32i a3, a2, EXC_TABLE_FIXUP + rsr a3, windowbase + s32i a3, a2, EXC_TABLE_PARAM + l32i a2, a2, EXC_TABLE_KSTK /* Load WB at the time the exception occurred. */ @@ -1206,8 +1213,12 @@ fast_syscall_spill_registers_fixup_return: wsr a3, windowbase rsync + rsr a3, excsave1 + l32i a3, a3, EXC_TABLE_DOUBLE_SAVE + rfde +ENDPROC(fast_syscall_spill_registers_fixup_return) /* * spill all registers. diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index 718eca1850bd..98b67d5f1514 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -341,7 +341,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sp = regs->areg[1]; - if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && ! on_sig_stack(sp)) { + if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && sas_ss_flags(sp) == 0) { sp = current->sas_ss_sp + current->sas_ss_size; } diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index 56f88b7afe2f..e9e1aad8c271 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -737,7 +737,8 @@ static int __init iss_net_setup(char *str) return 1; } - if ((new = alloc_bootmem(sizeof new)) == NULL) { + new = alloc_bootmem(sizeof(*new)); + if (new == NULL) { printk("Alloc_bootmem failed\n"); return 1; } diff --git a/drivers/clk/clk-nomadik.c b/drivers/clk/clk-nomadik.c index 51410c2ac2cb..4d978a3c88f7 100644 --- a/drivers/clk/clk-nomadik.c +++ b/drivers/clk/clk-nomadik.c @@ -27,6 +27,14 @@ */ #define SRC_CR 0x00U +#define SRC_CR_T0_ENSEL BIT(15) +#define SRC_CR_T1_ENSEL BIT(17) +#define SRC_CR_T2_ENSEL BIT(19) +#define SRC_CR_T3_ENSEL BIT(21) +#define SRC_CR_T4_ENSEL BIT(23) +#define SRC_CR_T5_ENSEL BIT(25) +#define SRC_CR_T6_ENSEL BIT(27) +#define SRC_CR_T7_ENSEL BIT(29) #define SRC_XTALCR 0x0CU #define SRC_XTALCR_XTALTIMEN BIT(20) #define SRC_XTALCR_SXTALDIS BIT(19) @@ -543,6 +551,19 @@ void __init nomadik_clk_init(void) __func__, np->name); return; } + + /* Set all timers to use the 2.4 MHz TIMCLK */ + val = readl(src_base + SRC_CR); + val |= SRC_CR_T0_ENSEL; + val |= SRC_CR_T1_ENSEL; + val |= SRC_CR_T2_ENSEL; + val |= SRC_CR_T3_ENSEL; + val |= SRC_CR_T4_ENSEL; + val |= SRC_CR_T5_ENSEL; + val |= SRC_CR_T6_ENSEL; + val |= SRC_CR_T7_ENSEL; + writel(val, src_base + SRC_CR); + val = readl(src_base + SRC_XTALCR); pr_info("SXTALO is %s\n", (val & SRC_XTALCR_SXTALDIS) ? "disabled" : "enabled"); diff --git a/drivers/clk/mvebu/armada-370.c b/drivers/clk/mvebu/armada-370.c index fc777bdc1886..81a202d12a7a 100644 --- a/drivers/clk/mvebu/armada-370.c +++ b/drivers/clk/mvebu/armada-370.c @@ -39,8 +39,8 @@ static const struct coreclk_ratio a370_coreclk_ratios[] __initconst = { }; static const u32 a370_tclk_freqs[] __initconst = { - 16600000, - 20000000, + 166000000, + 200000000, }; static u32 __init a370_get_tclk_freq(void __iomem *sar) diff --git a/drivers/clk/socfpga/clk.c b/drivers/clk/socfpga/clk.c index 5bb848cac6ec..81dd31a686df 100644 --- a/drivers/clk/socfpga/clk.c +++ b/drivers/clk/socfpga/clk.c @@ -49,7 +49,7 @@ #define SOCFPGA_L4_SP_CLK "l4_sp_clk" #define SOCFPGA_NAND_CLK "nand_clk" #define SOCFPGA_NAND_X_CLK "nand_x_clk" -#define SOCFPGA_MMC_CLK "mmc_clk" +#define SOCFPGA_MMC_CLK "sdmmc_clk" #define SOCFPGA_DB_CLK "gpio_db_clk" #define div_mask(width) ((1 << (width)) - 1) diff --git a/drivers/clk/versatile/clk-icst.c b/drivers/clk/versatile/clk-icst.c index 67ccf4aa7277..f5e4c21b301f 100644 --- a/drivers/clk/versatile/clk-icst.c +++ b/drivers/clk/versatile/clk-icst.c @@ -107,7 +107,7 @@ static int icst_set_rate(struct clk_hw *hw, unsigned long rate, vco = icst_hz_to_vco(icst->params, rate); icst->rate = icst_hz(icst->params, vco); - vco_set(icst->vcoreg, icst->lockreg, vco); + vco_set(icst->lockreg, icst->vcoreg, vco); return 0; } diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index d2c3253e015e..506fd23c7550 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -986,12 +986,12 @@ static int __init acpi_cpufreq_init(void) { int ret; + if (acpi_disabled) + return -ENODEV; + /* don't keep reloading if cpufreq_driver exists */ if (cpufreq_get_current_driver()) - return 0; - - if (acpi_disabled) - return 0; + return -EEXIST; pr_debug("acpi_cpufreq_init\n"); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index badf6206b2b2..eb3fdc755000 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -48,7 +48,7 @@ static inline int32_t div_fp(int32_t x, int32_t y) } struct sample { - int core_pct_busy; + int32_t core_pct_busy; u64 aperf; u64 mperf; int freq; @@ -68,7 +68,7 @@ struct _pid { int32_t i_gain; int32_t d_gain; int deadband; - int last_err; + int32_t last_err; }; struct cpudata { @@ -153,16 +153,15 @@ static inline void pid_d_gain_set(struct _pid *pid, int percent) pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); } -static signed int pid_calc(struct _pid *pid, int busy) +static signed int pid_calc(struct _pid *pid, int32_t busy) { - signed int err, result; + signed int result; int32_t pterm, dterm, fp_error; int32_t integral_limit; - err = pid->setpoint - busy; - fp_error = int_tofp(err); + fp_error = int_tofp(pid->setpoint) - busy; - if (abs(err) <= pid->deadband) + if (abs(fp_error) <= int_tofp(pid->deadband)) return 0; pterm = mul_fp(pid->p_gain, fp_error); @@ -176,8 +175,8 @@ static signed int pid_calc(struct _pid *pid, int busy) if (pid->integral < -integral_limit) pid->integral = -integral_limit; - dterm = mul_fp(pid->d_gain, (err - pid->last_err)); - pid->last_err = err; + dterm = mul_fp(pid->d_gain, fp_error - pid->last_err); + pid->last_err = fp_error; result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; @@ -367,12 +366,13 @@ static int intel_pstate_turbo_pstate(void) static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) { int max_perf = cpu->pstate.turbo_pstate; + int max_perf_adj; int min_perf; if (limits.no_turbo) max_perf = cpu->pstate.max_pstate; - max_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); - *max = clamp_t(int, max_perf, + max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); + *max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf)); @@ -436,8 +436,9 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu, struct sample *sample) { u64 core_pct; - core_pct = div64_u64(sample->aperf * 100, sample->mperf); - sample->freq = cpu->pstate.max_pstate * core_pct * 1000; + core_pct = div64_u64(int_tofp(sample->aperf * 100), + sample->mperf); + sample->freq = fp_toint(cpu->pstate.max_pstate * core_pct * 1000); sample->core_pct_busy = core_pct; } @@ -469,22 +470,19 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) mod_timer_pinned(&cpu->timer, jiffies + delay); } -static inline int intel_pstate_get_scaled_busy(struct cpudata *cpu) +static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) { - int32_t busy_scaled; int32_t core_busy, max_pstate, current_pstate; - core_busy = int_tofp(cpu->samples[cpu->sample_ptr].core_pct_busy); + core_busy = cpu->samples[cpu->sample_ptr].core_pct_busy; max_pstate = int_tofp(cpu->pstate.max_pstate); current_pstate = int_tofp(cpu->pstate.current_pstate); - busy_scaled = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); - - return fp_toint(busy_scaled); + return mul_fp(core_busy, div_fp(max_pstate, current_pstate)); } static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) { - int busy_scaled; + int32_t busy_scaled; struct _pid *pid; signed int ctl = 0; int steps; diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 3519111c566b..10b577fcf48d 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -305,6 +305,7 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg( edma_alloc_slot(EDMA_CTLR(echan->ch_num), EDMA_SLOT_ANY); if (echan->slot[i] < 0) { + kfree(edesc); dev_err(dev, "Failed to allocate slot\n"); kfree(edesc); return NULL; @@ -346,6 +347,7 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg( ccnt = sg_dma_len(sg) / (acnt * bcnt); if (ccnt > (SZ_64K - 1)) { dev_err(dev, "Exceeded max SG segment size\n"); + kfree(edesc); return NULL; } cidx = acnt * bcnt; diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 05ad9ba0a67e..fe58d0833a11 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -61,7 +61,7 @@ static int drm_version(struct drm_device *dev, void *data, /** Ioctl table */ static const struct drm_ioctl_desc drm_ioctls[] = { - DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, 0), DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, 0), DRM_IOCTL_DEF(DRM_IOCTL_IRQ_BUSID, drm_irq_by_busid, DRM_MASTER|DRM_ROOT_ONLY), diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index ea9022ef15d5..10d1de5bce6f 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -83,8 +83,7 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, return true; } -static void intel_crt_get_config(struct intel_encoder *encoder, - struct intel_crtc_config *pipe_config) +static unsigned int intel_crt_get_flags(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = encoder->base.dev->dev_private; struct intel_crt *crt = intel_encoder_to_crt(encoder); @@ -102,7 +101,25 @@ static void intel_crt_get_config(struct intel_encoder *encoder, else flags |= DRM_MODE_FLAG_NVSYNC; - pipe_config->adjusted_mode.flags |= flags; + return flags; +} + +static void intel_crt_get_config(struct intel_encoder *encoder, + struct intel_crtc_config *pipe_config) +{ + pipe_config->adjusted_mode.flags |= intel_crt_get_flags(encoder); +} + +static void hsw_crt_get_config(struct intel_encoder *encoder, + struct intel_crtc_config *pipe_config) +{ + intel_ddi_get_config(encoder, pipe_config); + + pipe_config->adjusted_mode.flags &= ~(DRM_MODE_FLAG_PHSYNC | + DRM_MODE_FLAG_NHSYNC | + DRM_MODE_FLAG_PVSYNC | + DRM_MODE_FLAG_NVSYNC); + pipe_config->adjusted_mode.flags |= intel_crt_get_flags(encoder); } /* Note: The caller is required to filter out dpms modes not supported by the @@ -799,7 +816,10 @@ void intel_crt_init(struct drm_device *dev) crt->base.mode_set = intel_crt_mode_set; crt->base.disable = intel_disable_crt; crt->base.enable = intel_enable_crt; - crt->base.get_config = intel_crt_get_config; + if (IS_HASWELL(dev)) + crt->base.get_config = hsw_crt_get_config; + else + crt->base.get_config = intel_crt_get_config; if (I915_HAS_HOTPLUG(dev)) crt->base.hpd_pin = HPD_CRT; if (HAS_DDI(dev)) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 63de2701b974..b53fff84a7d5 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1249,8 +1249,8 @@ static void intel_ddi_hot_plug(struct intel_encoder *intel_encoder) intel_dp_check_link_status(intel_dp); } -static void intel_ddi_get_config(struct intel_encoder *encoder, - struct intel_crtc_config *pipe_config) +void intel_ddi_get_config(struct intel_encoder *encoder, + struct intel_crtc_config *pipe_config) { struct drm_i915_private *dev_priv = encoder->base.dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); @@ -1268,6 +1268,23 @@ static void intel_ddi_get_config(struct intel_encoder *encoder, flags |= DRM_MODE_FLAG_NVSYNC; pipe_config->adjusted_mode.flags |= flags; + + switch (temp & TRANS_DDI_BPC_MASK) { + case TRANS_DDI_BPC_6: + pipe_config->pipe_bpp = 18; + break; + case TRANS_DDI_BPC_8: + pipe_config->pipe_bpp = 24; + break; + case TRANS_DDI_BPC_10: + pipe_config->pipe_bpp = 30; + break; + case TRANS_DDI_BPC_12: + pipe_config->pipe_bpp = 36; + break; + default: + break; + } } static void intel_ddi_destroy(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 581fb4b2f766..d78d33f9337d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2327,9 +2327,10 @@ static void intel_fdi_normal_train(struct drm_crtc *crtc) FDI_FE_ERRC_ENABLE); } -static bool pipe_has_enabled_pch(struct intel_crtc *intel_crtc) +static bool pipe_has_enabled_pch(struct intel_crtc *crtc) { - return intel_crtc->base.enabled && intel_crtc->config.has_pch_encoder; + return crtc->base.enabled && crtc->active && + crtc->config.has_pch_encoder; } static void ivb_modeset_global_resources(struct drm_device *dev) @@ -2979,6 +2980,48 @@ static void ironlake_pch_transcoder_set_timings(struct intel_crtc *crtc, I915_READ(VSYNCSHIFT(cpu_transcoder))); } +static void cpt_enable_fdi_bc_bifurcation(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t temp; + + temp = I915_READ(SOUTH_CHICKEN1); + if (temp & FDI_BC_BIFURCATION_SELECT) + return; + + WARN_ON(I915_READ(FDI_RX_CTL(PIPE_B)) & FDI_RX_ENABLE); + WARN_ON(I915_READ(FDI_RX_CTL(PIPE_C)) & FDI_RX_ENABLE); + + temp |= FDI_BC_BIFURCATION_SELECT; + DRM_DEBUG_KMS("enabling fdi C rx\n"); + I915_WRITE(SOUTH_CHICKEN1, temp); + POSTING_READ(SOUTH_CHICKEN1); +} + +static void ivybridge_update_fdi_bc_bifurcation(struct intel_crtc *intel_crtc) +{ + struct drm_device *dev = intel_crtc->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + switch (intel_crtc->pipe) { + case PIPE_A: + break; + case PIPE_B: + if (intel_crtc->config.fdi_lanes > 2) + WARN_ON(I915_READ(SOUTH_CHICKEN1) & FDI_BC_BIFURCATION_SELECT); + else + cpt_enable_fdi_bc_bifurcation(dev); + + break; + case PIPE_C: + cpt_enable_fdi_bc_bifurcation(dev); + + break; + default: + BUG(); + } +} + /* * Enable PCH resources required for PCH ports: * - PCH PLLs @@ -2997,6 +3040,9 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) assert_pch_transcoder_disabled(dev_priv, pipe); + if (IS_IVYBRIDGE(dev)) + ivybridge_update_fdi_bc_bifurcation(intel_crtc); + /* Write the TU size bits before fdi link training, so that error * detection works. */ I915_WRITE(FDI_RX_TUSIZE1(pipe), @@ -4983,6 +5029,22 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, if (!(tmp & PIPECONF_ENABLE)) return false; + if (IS_G4X(dev) || IS_VALLEYVIEW(dev)) { + switch (tmp & PIPECONF_BPC_MASK) { + case PIPECONF_6BPC: + pipe_config->pipe_bpp = 18; + break; + case PIPECONF_8BPC: + pipe_config->pipe_bpp = 24; + break; + case PIPECONF_10BPC: + pipe_config->pipe_bpp = 30; + break; + default: + break; + } + } + intel_get_pipe_timings(crtc, pipe_config); i9xx_get_pfit_config(crtc, pipe_config); @@ -5576,48 +5638,6 @@ static bool ironlake_compute_clocks(struct drm_crtc *crtc, return true; } -static void cpt_enable_fdi_bc_bifurcation(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t temp; - - temp = I915_READ(SOUTH_CHICKEN1); - if (temp & FDI_BC_BIFURCATION_SELECT) - return; - - WARN_ON(I915_READ(FDI_RX_CTL(PIPE_B)) & FDI_RX_ENABLE); - WARN_ON(I915_READ(FDI_RX_CTL(PIPE_C)) & FDI_RX_ENABLE); - - temp |= FDI_BC_BIFURCATION_SELECT; - DRM_DEBUG_KMS("enabling fdi C rx\n"); - I915_WRITE(SOUTH_CHICKEN1, temp); - POSTING_READ(SOUTH_CHICKEN1); -} - -static void ivybridge_update_fdi_bc_bifurcation(struct intel_crtc *intel_crtc) -{ - struct drm_device *dev = intel_crtc->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - - switch (intel_crtc->pipe) { - case PIPE_A: - break; - case PIPE_B: - if (intel_crtc->config.fdi_lanes > 2) - WARN_ON(I915_READ(SOUTH_CHICKEN1) & FDI_BC_BIFURCATION_SELECT); - else - cpt_enable_fdi_bc_bifurcation(dev); - - break; - case PIPE_C: - cpt_enable_fdi_bc_bifurcation(dev); - - break; - default: - BUG(); - } -} - int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp) { /* @@ -5811,9 +5831,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, &intel_crtc->config.fdi_m_n); } - if (IS_IVYBRIDGE(dev)) - ivybridge_update_fdi_bc_bifurcation(intel_crtc); - ironlake_set_pipeconf(crtc); /* Set up the display plane register */ @@ -5881,6 +5898,23 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, if (!(tmp & PIPECONF_ENABLE)) return false; + switch (tmp & PIPECONF_BPC_MASK) { + case PIPECONF_6BPC: + pipe_config->pipe_bpp = 18; + break; + case PIPECONF_8BPC: + pipe_config->pipe_bpp = 24; + break; + case PIPECONF_10BPC: + pipe_config->pipe_bpp = 30; + break; + case PIPECONF_12BPC: + pipe_config->pipe_bpp = 36; + break; + default: + break; + } + if (I915_READ(PCH_TRANSCONF(crtc->pipe)) & TRANS_ENABLE) { struct intel_shared_dpll *pll; @@ -8612,6 +8646,9 @@ intel_pipe_config_compare(struct drm_device *dev, PIPE_CONF_CHECK_X(dpll_hw_state.fp0); PIPE_CONF_CHECK_X(dpll_hw_state.fp1); + if (IS_G4X(dev) || INTEL_INFO(dev)->gen >= 5) + PIPE_CONF_CHECK_I(pipe_bpp); + #undef PIPE_CONF_CHECK_X #undef PIPE_CONF_CHECK_I #undef PIPE_CONF_CHECK_FLAGS diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 2c555f91bfae..1a431377d83b 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1401,6 +1401,26 @@ static void intel_dp_get_config(struct intel_encoder *encoder, else pipe_config->port_clock = 270000; } + + if (is_edp(intel_dp) && dev_priv->vbt.edp_bpp && + pipe_config->pipe_bpp > dev_priv->vbt.edp_bpp) { + /* + * This is a big fat ugly hack. + * + * Some machines in UEFI boot mode provide us a VBT that has 18 + * bpp and 1.62 GHz link bandwidth for eDP, which for reasons + * unknown we fail to light up. Yet the same BIOS boots up with + * 24 bpp and 2.7 GHz link. Use the same bpp as the BIOS uses as + * max, not what it tells us to use. + * + * Note: This will still be broken if the eDP panel is not lit + * up by the BIOS, and thus we can't get the mode at module + * load. + */ + DRM_DEBUG_KMS("pipe has %d bpp for eDP panel, overriding BIOS-provided max %d bpp\n", + pipe_config->pipe_bpp, dev_priv->vbt.edp_bpp); + dev_priv->vbt.edp_bpp = pipe_config->pipe_bpp; + } } static bool is_edp_psr(struct intel_dp *intel_dp) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9b7b68fd5d47..7f2b384ac939 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -765,6 +765,8 @@ extern void intel_ddi_prepare_link_retrain(struct drm_encoder *encoder); extern bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); extern void intel_ddi_fdi_disable(struct drm_crtc *crtc); +extern void intel_ddi_get_config(struct intel_encoder *encoder, + struct intel_crtc_config *pipe_config); extern void intel_display_handle_reset(struct drm_device *dev); extern bool intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 831a5c021c4b..b8af94a5be39 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -700,6 +700,22 @@ static const struct dmi_system_id intel_no_lvds[] = { }, { .callback = intel_no_lvds_dmi_callback, + .ident = "Intel D410PT", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Intel"), + DMI_MATCH(DMI_BOARD_NAME, "D410PT"), + }, + }, + { + .callback = intel_no_lvds_dmi_callback, + .ident = "Intel D425KT", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Intel"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "D425KT"), + }, + }, + { + .callback = intel_no_lvds_dmi_callback, .ident = "Intel D510MO", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Intel"), diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index fe1de855775e..57fcc4b16a52 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -291,6 +291,7 @@ void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode /* fglrx clears sth in AFMT_AUDIO_PACKET_CONTROL2 here */ WREG32(HDMI_ACR_PACKET_CONTROL + offset, + HDMI_ACR_SOURCE | /* select SW CTS value */ HDMI_ACR_AUTO_SEND); /* allow hw to sent ACR packets when required */ evergreen_hdmi_update_ACR(encoder, mode->clock); diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index 71399065db04..b41905573cd2 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -2635,7 +2635,7 @@ int kv_dpm_init(struct radeon_device *rdev) pi->caps_sclk_ds = true; pi->enable_auto_thermal_throttling = true; pi->disable_nb_ps3_in_battery = false; - pi->bapm_enable = true; + pi->bapm_enable = false; pi->voltage_drop_t = 0; pi->caps_sclk_throttle_low_notification = false; pi->caps_fps = false; /* true? */ diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index a400ac1c4147..24f4960f59ee 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1272,8 +1272,8 @@ struct radeon_blacklist_clocks struct radeon_clock_and_voltage_limits { u32 sclk; u32 mclk; - u32 vddc; - u32 vddci; + u16 vddc; + u16 vddci; }; struct radeon_clock_array { diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f0612645de99..7567437dbd34 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -177,18 +177,18 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_mr_size = ~0ull; props->page_size_cap = dev->dev->caps.page_size_cap; - props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps; + props->max_qp = dev->dev->quotas.qp; props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; props->max_sge = min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg); - props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs; + props->max_cq = dev->dev->quotas.cq; props->max_cqe = dev->dev->caps.max_cqes; - props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws; + props->max_mr = dev->dev->quotas.mpt; props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds; props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma; props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma; props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; - props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; + props->max_srq = dev->dev->quotas.srq; props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 3591855cc5b5..6df23502059a 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -594,7 +594,7 @@ isert_connect_release(struct isert_conn *isert_conn) pr_debug("Entering isert_connect_release(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); - if (device->use_frwr) + if (device && device->use_frwr) isert_conn_free_frwr_pool(isert_conn); if (isert_conn->conn_qp) { diff --git a/drivers/input/input.c b/drivers/input/input.c index c04469928925..e75d015024a1 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1734,6 +1734,7 @@ EXPORT_SYMBOL_GPL(input_class); */ struct input_dev *input_allocate_device(void) { + static atomic_t input_no = ATOMIC_INIT(0); struct input_dev *dev; dev = kzalloc(sizeof(struct input_dev), GFP_KERNEL); @@ -1743,9 +1744,13 @@ struct input_dev *input_allocate_device(void) device_initialize(&dev->dev); mutex_init(&dev->mutex); spin_lock_init(&dev->event_lock); + init_timer(&dev->timer); INIT_LIST_HEAD(&dev->h_list); INIT_LIST_HEAD(&dev->node); + dev_set_name(&dev->dev, "input%ld", + (unsigned long) atomic_inc_return(&input_no) - 1); + __module_get(THIS_MODULE); } @@ -2019,7 +2024,6 @@ static void devm_input_device_unregister(struct device *dev, void *res) */ int input_register_device(struct input_dev *dev) { - static atomic_t input_no = ATOMIC_INIT(0); struct input_devres *devres = NULL; struct input_handler *handler; unsigned int packet_size; @@ -2059,7 +2063,6 @@ int input_register_device(struct input_dev *dev) * If delay and period are pre-set by the driver, then autorepeating * is handled by the driver itself and we don't do it in input.c. */ - init_timer(&dev->timer); if (!dev->rep[REP_DELAY] && !dev->rep[REP_PERIOD]) { dev->timer.data = (long) dev; dev->timer.function = input_repeat_key; @@ -2073,9 +2076,6 @@ int input_register_device(struct input_dev *dev) if (!dev->setkeycode) dev->setkeycode = input_default_setkeycode; - dev_set_name(&dev->dev, "input%ld", - (unsigned long) atomic_inc_return(&input_no) - 1); - error = device_add(&dev->dev); if (error) goto err_free_vals; diff --git a/drivers/input/keyboard/pxa27x_keypad.c b/drivers/input/keyboard/pxa27x_keypad.c index 134c3b404a54..a2e758d27584 100644 --- a/drivers/input/keyboard/pxa27x_keypad.c +++ b/drivers/input/keyboard/pxa27x_keypad.c @@ -786,10 +786,17 @@ static int pxa27x_keypad_probe(struct platform_device *pdev) input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP); input_set_capability(input_dev, EV_MSC, MSC_SCAN); - if (pdata) + if (pdata) { error = pxa27x_keypad_build_keycode(keypad); - else + } else { error = pxa27x_keypad_build_keycode_from_dt(keypad); + /* + * Data that we get from DT resides in dynamically + * allocated memory so we need to update our pdata + * pointer. + */ + pdata = keypad->pdata; + } if (error) { dev_err(&pdev->dev, "failed to build keycode\n"); goto failed_put_clk; diff --git a/drivers/input/misc/cm109.c b/drivers/input/misc/cm109.c index 082684e7f390..9365535ba7f1 100644 --- a/drivers/input/misc/cm109.c +++ b/drivers/input/misc/cm109.c @@ -351,7 +351,9 @@ static void cm109_urb_irq_callback(struct urb *urb) if (status) { if (status == -ESHUTDOWN) return; - dev_err(&dev->intf->dev, "%s: urb status %d\n", __func__, status); + dev_err_ratelimited(&dev->intf->dev, "%s: urb status %d\n", + __func__, status); + goto out; } /* Special keys */ @@ -418,8 +420,12 @@ static void cm109_urb_ctl_callback(struct urb *urb) dev->ctl_data->byte[2], dev->ctl_data->byte[3]); - if (status) - dev_err(&dev->intf->dev, "%s: urb status %d\n", __func__, status); + if (status) { + if (status == -ESHUTDOWN) + return; + dev_err_ratelimited(&dev->intf->dev, "%s: urb status %d\n", + __func__, status); + } spin_lock(&dev->ctl_submit_lock); @@ -427,7 +433,7 @@ static void cm109_urb_ctl_callback(struct urb *urb) if (likely(!dev->shutdown)) { - if (dev->buzzer_pending) { + if (dev->buzzer_pending || status) { dev->buzzer_pending = 0; dev->ctl_urb_pending = 1; cm109_submit_buzz_toggle(dev); diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 7c5d72a6a26a..83658472ad25 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -103,6 +103,7 @@ static const struct alps_model_info alps_model_data[] = { /* Dell Latitude E5500, E6400, E6500, Precision M4400 */ { { 0x62, 0x02, 0x14 }, 0x00, ALPS_PROTO_V2, 0xcf, 0xcf, ALPS_PASS | ALPS_DUALPOINT | ALPS_PS2_INTERLEAVED }, + { { 0x73, 0x00, 0x14 }, 0x00, ALPS_PROTO_V2, 0xcf, 0xcf, ALPS_DUALPOINT }, /* Dell XT2 */ { { 0x73, 0x02, 0x50 }, 0x00, ALPS_PROTO_V2, 0xcf, 0xcf, ALPS_FOUR_BUTTONS }, /* Dell Vostro 1400 */ { { 0x52, 0x01, 0x14 }, 0x00, ALPS_PROTO_V2, 0xff, 0xff, ALPS_PASS | ALPS_DUALPOINT | ALPS_PS2_INTERLEAVED }, /* Toshiba Tecra A11-11L */ diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 78e4de42efaa..52c9ebf94729 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -223,21 +223,26 @@ static int i8042_flush(void) { unsigned long flags; unsigned char data, str; - int i = 0; + int count = 0; + int retval = 0; spin_lock_irqsave(&i8042_lock, flags); - while (((str = i8042_read_status()) & I8042_STR_OBF) && (i < I8042_BUFFER_SIZE)) { - udelay(50); - data = i8042_read_data(); - i++; - dbg("%02x <- i8042 (flush, %s)\n", - data, str & I8042_STR_AUXDATA ? "aux" : "kbd"); + while ((str = i8042_read_status()) & I8042_STR_OBF) { + if (count++ < I8042_BUFFER_SIZE) { + udelay(50); + data = i8042_read_data(); + dbg("%02x <- i8042 (flush, %s)\n", + data, str & I8042_STR_AUXDATA ? "aux" : "kbd"); + } else { + retval = -EIO; + break; + } } spin_unlock_irqrestore(&i8042_lock, flags); - return i; + return retval; } /* @@ -849,7 +854,7 @@ static int __init i8042_check_aux(void) static int i8042_controller_check(void) { - if (i8042_flush() == I8042_BUFFER_SIZE) { + if (i8042_flush()) { pr_err("No controller found\n"); return -ENODEV; } diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c index 79b69ea47f74..e53416a4d7f3 100644 --- a/drivers/input/tablet/wacom_sys.c +++ b/drivers/input/tablet/wacom_sys.c @@ -1031,6 +1031,7 @@ static void wacom_destroy_leds(struct wacom *wacom) } static enum power_supply_property wacom_battery_props[] = { + POWER_SUPPLY_PROP_SCOPE, POWER_SUPPLY_PROP_CAPACITY }; @@ -1042,6 +1043,9 @@ static int wacom_battery_get_property(struct power_supply *psy, int ret = 0; switch (psp) { + case POWER_SUPPLY_PROP_SCOPE: + val->intval = POWER_SUPPLY_SCOPE_DEVICE; + break; case POWER_SUPPLY_PROP_CAPACITY: val->intval = wacom->wacom_wac.battery_capacity * 100 / 31; diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index b2aa503c16b1..c59b797eeafa 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -2054,6 +2054,12 @@ static const struct wacom_features wacom_features_0x101 = static const struct wacom_features wacom_features_0x10D = { "Wacom ISDv4 10D", WACOM_PKGLEN_MTTPC, 26202, 16325, 255, 0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; +static const struct wacom_features wacom_features_0x10E = + { "Wacom ISDv4 10E", WACOM_PKGLEN_MTTPC, 27760, 15694, 255, + 0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; +static const struct wacom_features wacom_features_0x10F = + { "Wacom ISDv4 10F", WACOM_PKGLEN_MTTPC, 27760, 15694, 255, + 0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0x4001 = { "Wacom ISDv4 4001", WACOM_PKGLEN_MTTPC, 26202, 16325, 255, 0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -2248,6 +2254,8 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x100) }, { USB_DEVICE_WACOM(0x101) }, { USB_DEVICE_WACOM(0x10D) }, + { USB_DEVICE_WACOM(0x10E) }, + { USB_DEVICE_WACOM(0x10F) }, { USB_DEVICE_WACOM(0x300) }, { USB_DEVICE_WACOM(0x301) }, { USB_DEVICE_WACOM(0x304) }, diff --git a/drivers/md/md.c b/drivers/md/md.c index adf4d7e1d5e1..561a65f82e26 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8111,6 +8111,7 @@ static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors, u64 *p; int lo, hi; int rv = 1; + unsigned long flags; if (bb->shift < 0) /* badblocks are disabled */ @@ -8125,7 +8126,7 @@ static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors, sectors = next - s; } - write_seqlock_irq(&bb->lock); + write_seqlock_irqsave(&bb->lock, flags); p = bb->page; lo = 0; @@ -8241,7 +8242,7 @@ static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors, bb->changed = 1; if (!acknowledged) bb->unacked_exist = 1; - write_sequnlock_irq(&bb->lock); + write_sequnlock_irqrestore(&bb->lock, flags); return rv; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index d60412c7f995..aacf6bf352d8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1479,6 +1479,7 @@ static int raid1_spare_active(struct mddev *mddev) } } if (rdev + && rdev->recovery_offset == MaxSector && !test_bit(Faulty, &rdev->flags) && !test_and_set_bit(In_sync, &rdev->flags)) { count++; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index df7b0a06b0ea..73dc8a377522 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1782,6 +1782,7 @@ static int raid10_spare_active(struct mddev *mddev) } sysfs_notify_dirent_safe(tmp->replacement->sysfs_state); } else if (tmp->rdev + && tmp->rdev->recovery_offset == MaxSector && !test_bit(Faulty, &tmp->rdev->flags) && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { count++; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7ff4f252ca1a..f8b906843926 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -778,6 +778,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) bi->bi_io_vec[0].bv_len = STRIPE_SIZE; bi->bi_io_vec[0].bv_offset = 0; bi->bi_size = STRIPE_SIZE; + /* + * If this is discard request, set bi_vcnt 0. We don't + * want to confuse SCSI because SCSI will replace payload + */ + if (rw & REQ_DISCARD) + bi->bi_vcnt = 0; if (rrdev) set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); @@ -816,6 +822,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; rbi->bi_io_vec[0].bv_offset = 0; rbi->bi_size = STRIPE_SIZE; + /* + * If this is discard request, set bi_vcnt 0. We don't + * want to confuse SCSI because SCSI will replace payload + */ + if (rw & REQ_DISCARD) + rbi->bi_vcnt = 0; if (conf->mddev->gendisk) trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), rbi, disk_devt(conf->mddev->gendisk), @@ -2910,6 +2922,14 @@ static void handle_stripe_clean_event(struct r5conf *conf, } /* now that discard is done we can proceed with any sync */ clear_bit(STRIPE_DISCARD, &sh->state); + /* + * SCSI discard will change some bio fields and the stripe has + * no updated data, so remove it from hash list and the stripe + * will be reinitialized + */ + spin_lock_irq(&conf->device_lock); + remove_hash(sh); + spin_unlock_irq(&conf->device_lock); if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) set_bit(STRIPE_HANDLE, &sh->state); diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c index 59ab0692f0b9..a9830ff8e3f3 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c @@ -349,7 +349,7 @@ static int legacy_set_geometry(struct gpmi_nand_data *this) int common_nfc_set_geometry(struct gpmi_nand_data *this) { - return set_geometry_by_ecc_info(this) ? 0 : legacy_set_geometry(this); + return legacy_set_geometry(this); } struct dma_chan *get_dma_chan(struct gpmi_nand_data *this) diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index dd03dfdfb0d6..c28d4e29af1a 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -1320,7 +1320,12 @@ static int pxa3xx_nand_probe(struct platform_device *pdev) for (cs = 0; cs < pdata->num_cs; cs++) { struct mtd_info *mtd = info->host[cs]->mtd; - mtd->name = pdev->name; + /* + * The mtd name matches the one used in 'mtdparts' kernel + * parameter. This name cannot be changed or otherwise + * user's mtd partitions configuration would get broken. + */ + mtd->name = "pxa3xx_nand-0"; info->cs = cs; ret = pxa3xx_nand_scan(mtd); if (ret) { diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 7661261de2f0..40e7b1cb4aea 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -82,8 +82,8 @@ static int bond_newlink(struct net *src_net, struct net_device *bond_dev, static size_t bond_get_size(const struct net_device *bond_dev) { - return nla_total_size(sizeof(u8)); /* IFLA_BOND_MODE */ - + nla_total_size(sizeof(u32)); /* IFLA_BOND_ACTIVE_SLAVE */ + return nla_total_size(sizeof(u8)) + /* IFLA_BOND_MODE */ + nla_total_size(sizeof(u32)); /* IFLA_BOND_ACTIVE_SLAVE */ } static int bond_fill_info(struct sk_buff *skb, diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index a668cd491cb3..e3fc07cf2f62 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -814,9 +814,6 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota) msg_ctrl_save = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, 0)); - if (msg_ctrl_save & IF_MCONT_EOB) - return num_rx_pkts; - if (msg_ctrl_save & IF_MCONT_MSGLST) { c_can_handle_lost_msg_obj(dev, 0, msg_obj); num_rx_pkts++; @@ -824,6 +821,9 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota) continue; } + if (msg_ctrl_save & IF_MCONT_EOB) + return num_rx_pkts; + if (!(msg_ctrl_save & IF_MCONT_NEWDAT)) continue; diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 3b9546588240..4b2d5ed62b11 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1544,9 +1544,9 @@ static int kvaser_usb_init_one(struct usb_interface *intf, return 0; } -static void kvaser_usb_get_endpoints(const struct usb_interface *intf, - struct usb_endpoint_descriptor **in, - struct usb_endpoint_descriptor **out) +static int kvaser_usb_get_endpoints(const struct usb_interface *intf, + struct usb_endpoint_descriptor **in, + struct usb_endpoint_descriptor **out) { const struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *endpoint; @@ -1557,12 +1557,18 @@ static void kvaser_usb_get_endpoints(const struct usb_interface *intf, for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; - if (usb_endpoint_is_bulk_in(endpoint)) + if (!*in && usb_endpoint_is_bulk_in(endpoint)) *in = endpoint; - if (usb_endpoint_is_bulk_out(endpoint)) + if (!*out && usb_endpoint_is_bulk_out(endpoint)) *out = endpoint; + + /* use first bulk endpoint for in and out */ + if (*in && *out) + return 0; } + + return -ENODEV; } static int kvaser_usb_probe(struct usb_interface *intf, @@ -1576,8 +1582,8 @@ static int kvaser_usb_probe(struct usb_interface *intf, if (!dev) return -ENOMEM; - kvaser_usb_get_endpoints(intf, &dev->bulk_in, &dev->bulk_out); - if (!dev->bulk_in || !dev->bulk_out) { + err = kvaser_usb_get_endpoints(intf, &dev->bulk_in, &dev->bulk_out); + if (err) { dev_err(&intf->dev, "Cannot get usb endpoint(s)"); return err; } diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index a98778e3af84..e2aa09ce6af7 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -252,25 +252,33 @@ static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac, struct bgmac_slot_info *slot) { struct device *dma_dev = bgmac->core->dma_dev; + struct sk_buff *skb; + dma_addr_t dma_addr; struct bgmac_rx_header *rx; /* Alloc skb */ - slot->skb = netdev_alloc_skb(bgmac->net_dev, BGMAC_RX_BUF_SIZE); - if (!slot->skb) + skb = netdev_alloc_skb(bgmac->net_dev, BGMAC_RX_BUF_SIZE); + if (!skb) return -ENOMEM; /* Poison - if everything goes fine, hardware will overwrite it */ - rx = (struct bgmac_rx_header *)slot->skb->data; + rx = (struct bgmac_rx_header *)skb->data; rx->len = cpu_to_le16(0xdead); rx->flags = cpu_to_le16(0xbeef); /* Map skb for the DMA */ - slot->dma_addr = dma_map_single(dma_dev, slot->skb->data, - BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(dma_dev, slot->dma_addr)) { + dma_addr = dma_map_single(dma_dev, skb->data, + BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dma_dev, dma_addr)) { bgmac_err(bgmac, "DMA mapping error\n"); + dev_kfree_skb(skb); return -ENOMEM; } + + /* Update the slot */ + slot->skb = skb; + slot->dma_addr = dma_addr; + if (slot->dma_addr & 0xC0000000) bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n"); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 6e46cff5236d..dcafbda3e5be 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2545,10 +2545,6 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) } } - /* Allocated memory for FW statistics */ - if (bnx2x_alloc_fw_stats_mem(bp)) - LOAD_ERROR_EXIT(bp, load_error0); - /* need to be done after alloc mem, since it's self adjusting to amount * of memory available for RSS queues */ @@ -2558,6 +2554,10 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) LOAD_ERROR_EXIT(bp, load_error0); } + /* Allocated memory for FW statistics */ + if (bnx2x_alloc_fw_stats_mem(bp)) + LOAD_ERROR_EXIT(bp, load_error0); + /* request pf to initialize status blocks */ if (IS_VF(bp)) { rc = bnx2x_vfpf_init(bp); @@ -2812,8 +2812,8 @@ load_error1: if (IS_PF(bp)) bnx2x_clear_pf_load(bp); load_error0: - bnx2x_free_fp_mem(bp); bnx2x_free_fw_stats_mem(bp); + bnx2x_free_fp_mem(bp); bnx2x_free_mem(bp); return rc; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 71fffad94aff..0216d592d0ce 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2018,6 +2018,8 @@ failed: void bnx2x_iov_remove_one(struct bnx2x *bp) { + int vf_idx; + /* if SRIOV is not enabled there's nothing to do */ if (!IS_SRIOV(bp)) return; @@ -2026,6 +2028,18 @@ void bnx2x_iov_remove_one(struct bnx2x *bp) pci_disable_sriov(bp->pdev); DP(BNX2X_MSG_IOV, "sriov disabled\n"); + /* disable access to all VFs */ + for (vf_idx = 0; vf_idx < bp->vfdb->sriov.total; vf_idx++) { + bnx2x_pretend_func(bp, + HW_VF_HANDLE(bp, + bp->vfdb->sriov.first_vf_in_pf + + vf_idx)); + DP(BNX2X_MSG_IOV, "disabling internal access for vf %d\n", + bp->vfdb->sriov.first_vf_in_pf + vf_idx); + bnx2x_vf_enable_internal(bp, 0); + bnx2x_pretend_func(bp, BP_ABS_FUNC(bp)); + } + /* free vf database */ __bnx2x_iov_free_vfdb(bp); } @@ -3197,7 +3211,7 @@ int bnx2x_enable_sriov(struct bnx2x *bp) * the "acquire" messages to appear on the VF PF channel. */ DP(BNX2X_MSG_IOV, "about to call enable sriov\n"); - pci_disable_sriov(bp->pdev); + bnx2x_disable_sriov(bp); rc = pci_enable_sriov(bp->pdev, req_vfs); if (rc) { BNX2X_ERR("pci_enable_sriov failed with %d\n", rc); diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index 9c89dc8fe105..632b318eb38a 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -1599,7 +1599,8 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb, flits = skb_transport_offset(skb) / 8; sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl; sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb), - skb->tail - skb->transport_header, + skb_tail_pointer(skb) - + skb_transport_header(skb), adap->pdev); if (need_skb_unmap()) { setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits); diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 2c88ac295eea..f4825db5d179 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -840,6 +840,16 @@ int be_load_fw(struct be_adapter *adapter, u8 *func); bool be_is_wol_supported(struct be_adapter *adapter); bool be_pause_supported(struct be_adapter *adapter); u32 be_get_fw_log_level(struct be_adapter *adapter); + +static inline int fw_major_num(const char *fw_ver) +{ + int fw_major = 0; + + sscanf(fw_ver, "%d.", &fw_major); + + return fw_major; +} + int be_update_queues(struct be_adapter *adapter); int be_poll(struct napi_struct *napi, int budget); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 8080a1a5cee7..741d3bff5ae7 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -3379,6 +3379,12 @@ static int be_setup(struct be_adapter *adapter) be_cmd_get_fw_ver(adapter, adapter->fw_ver, adapter->fw_on_flash); + if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) { + dev_err(dev, "Firmware on card is old(%s), IRQs may not work.", + adapter->fw_ver); + dev_err(dev, "Please upgrade firmware to version >= 4.0\n"); + } + if (adapter->vlans_added) be_vid_config(adapter); diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index dac564c25440..e7847510eda2 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -263,7 +263,9 @@ static inline void mal_schedule_poll(struct mal_instance *mal) { if (likely(napi_schedule_prep(&mal->napi))) { MAL_DBG2(mal, "schedule_poll" NL); + spin_lock(&mal->lock); mal_disable_eob_irq(mal); + spin_unlock(&mal->lock); __napi_schedule(&mal->napi); } else MAL_DBG2(mal, "already in poll" NL); @@ -442,15 +444,13 @@ static int mal_poll(struct napi_struct *napi, int budget) if (unlikely(mc->ops->peek_rx(mc->dev) || test_bit(MAL_COMMAC_RX_STOPPED, &mc->flags))) { MAL_DBG2(mal, "rotting packet" NL); - if (napi_reschedule(napi)) - mal_disable_eob_irq(mal); - else - MAL_DBG2(mal, "already in poll list" NL); - - if (budget > 0) - goto again; - else + if (!napi_reschedule(napi)) goto more_work; + + spin_lock_irqsave(&mal->lock, flags); + mal_disable_eob_irq(mal); + spin_unlock_irqrestore(&mal->lock, flags); + goto again; } mc->ops->poll_tx(mc->dev); } diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 59ad007dd5aa..ad6800ad1bfc 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -3917,8 +3917,7 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, " next_to_watch <%x>\n" " jiffies <%lx>\n" " next_to_watch.status <%x>\n", - (unsigned long)((tx_ring - adapter->tx_ring) / - sizeof(struct e1000_tx_ring)), + (unsigned long)(tx_ring - adapter->tx_ring), readl(hw->hw_addr + tx_ring->tdh), readl(hw->hw_addr + tx_ring->tdt), tx_ring->next_to_use, diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 0ae3177416c7..b918ba3640f9 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -771,8 +771,10 @@ static int igb_set_eeprom(struct net_device *netdev, if (eeprom->len == 0) return -EOPNOTSUPP; - if (hw->mac.type == e1000_i211) + if ((hw->mac.type >= e1000_i210) && + !igb_get_flash_presence_i210(hw)) { return -EOPNOTSUPP; + } if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16))) return -EFAULT; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index f51fd1f4fb49..09149143ee0f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -67,7 +67,11 @@ #define IXGBE_MAX_TXD 4096 #define IXGBE_MIN_TXD 64 +#if (PAGE_SIZE < 8192) #define IXGBE_DEFAULT_RXD 512 +#else +#define IXGBE_DEFAULT_RXD 128 +#endif #define IXGBE_MAX_RXD 4096 #define IXGBE_MIN_RXD 64 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index a7d1a1c43f12..5191b3ca9a26 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3823,14 +3823,6 @@ void ixgbe_set_rx_mode(struct net_device *netdev) if (netdev->flags & IFF_ALLMULTI) { fctrl |= IXGBE_FCTRL_MPE; vmolr |= IXGBE_VMOLR_MPE; - } else { - /* - * Write addresses to the MTA, if the attempt fails - * then we should just turn on promiscuous mode so - * that we can at least receive multicast traffic - */ - hw->mac.ops.update_mc_addr_list(hw, netdev); - vmolr |= IXGBE_VMOLR_ROMPE; } ixgbe_vlan_filter_enable(adapter); hw->addr_ctrl.user_set_promisc = false; @@ -3847,6 +3839,13 @@ void ixgbe_set_rx_mode(struct net_device *netdev) vmolr |= IXGBE_VMOLR_ROPE; } + /* Write addresses to the MTA, if the attempt fails + * then we should just turn on promiscuous mode so + * that we can at least receive multicast traffic + */ + hw->mac.ops.update_mc_addr_list(hw, netdev); + vmolr |= IXGBE_VMOLR_ROMPE; + if (adapter->num_vfs) ixgbe_restore_vf_multicasts(adapter); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index acf38067a700..8971e2d0a984 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -286,9 +286,13 @@ static inline bool ixgbevf_qv_disable(struct ixgbevf_q_vector *q_vector) ((_eitr) ? (1000000000 / ((_eitr) * 256)) : 8) #define EITR_REG_TO_INTS_PER_SEC EITR_INTS_PER_SEC_TO_REG -#define IXGBE_DESC_UNUSED(R) \ - ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ - (R)->next_to_clean - (R)->next_to_use - 1) +static inline u16 ixgbevf_desc_unused(struct ixgbevf_ring *ring) +{ + u16 ntc = ring->next_to_clean; + u16 ntu = ring->next_to_use; + + return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1; +} #define IXGBEVF_RX_DESC(R, i) \ (&(((union ixgbe_adv_rx_desc *)((R)->desc))[i])) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index b16b694951b8..038bfc8b7616 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -251,7 +251,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) if (unlikely(count && netif_carrier_ok(tx_ring->netdev) && - (IXGBE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { + (ixgbevf_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean. */ @@ -497,15 +497,6 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, total_rx_bytes += skb->len; total_rx_packets++; - /* - * Work around issue of some types of VM to VM loop back - * packets not getting split correctly - */ - if (staterr & IXGBE_RXD_STAT_LB) { - u32 header_fixup_len = skb_headlen(skb); - if (header_fixup_len < 14) - skb_push(skb, header_fixup_len); - } skb->protocol = eth_type_trans(skb, rx_ring->netdev); /* Workaround hardware that can't do proper VEPA multicast @@ -538,7 +529,7 @@ next_desc: } rx_ring->next_to_clean = i; - cleaned_count = IXGBE_DESC_UNUSED(rx_ring); + cleaned_count = ixgbevf_desc_unused(rx_ring); if (cleaned_count) ixgbevf_alloc_rx_buffers(adapter, rx_ring, cleaned_count); @@ -1389,7 +1380,7 @@ static void ixgbevf_configure(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) { struct ixgbevf_ring *ring = &adapter->rx_ring[i]; ixgbevf_alloc_rx_buffers(adapter, ring, - IXGBE_DESC_UNUSED(ring)); + ixgbevf_desc_unused(ring)); } } @@ -3111,7 +3102,7 @@ static int __ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size) /* We need to check again in a case another CPU has just * made room available. */ - if (likely(IXGBE_DESC_UNUSED(tx_ring) < size)) + if (likely(ixgbevf_desc_unused(tx_ring) < size)) return -EBUSY; /* A reprieve! - use start_queue because it doesn't call schedule */ @@ -3122,7 +3113,7 @@ static int __ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size) static int ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size) { - if (likely(IXGBE_DESC_UNUSED(tx_ring) >= size)) + if (likely(ixgbevf_desc_unused(tx_ring) >= size)) return 0; return __ixgbevf_maybe_stop_tx(tx_ring, size); } diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 735765c21c95..65d41b76fa2c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1687,11 +1687,11 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (NO_INDX != vp_oper->vlan_idx) { __mlx4_unregister_vlan(&priv->dev, - port, vp_oper->vlan_idx); + port, vp_oper->state.default_vlan); vp_oper->vlan_idx = NO_INDX; } if (NO_INDX != vp_oper->mac_idx) { - __mlx4_unregister_mac(&priv->dev, port, vp_oper->mac_idx); + __mlx4_unregister_mac(&priv->dev, port, vp_oper->state.mac); vp_oper->mac_idx = NO_INDX; } } @@ -1718,6 +1718,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, if (cmd == MLX4_COMM_CMD_RESET) { mlx4_warn(dev, "Received reset from slave:%d\n", slave); slave_state[slave].active = false; + slave_state[slave].old_vlan_api = false; mlx4_master_deactivate_admin_state(priv, slave); for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) { slave_state[slave].event_eq[i].eqn = -1; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 85d91665d400..b5554121aca4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -417,7 +417,6 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; - int idx; en_dbg(HW, priv, "Killing VID:%d\n", vid); @@ -425,10 +424,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, /* Remove VID from port VLAN filter */ mutex_lock(&mdev->state_lock); - if (!mlx4_find_cached_vlan(mdev->dev, priv->port, vid, &idx)) - mlx4_unregister_vlan(mdev->dev, priv->port, idx); - else - en_dbg(HW, priv, "could not find vid %d in cache\n", vid); + mlx4_unregister_vlan(mdev->dev, priv->port, vid); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index c151e7a6710a..c3e70bc2d875 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -177,6 +177,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { + struct mlx4_priv *priv = mlx4_priv(dev); u8 field; u32 size; int err = 0; @@ -185,18 +186,26 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1 #define QUERY_FUNC_CAP_PF_BHVR_OFFSET 0x4 #define QUERY_FUNC_CAP_FMR_OFFSET 0x8 -#define QUERY_FUNC_CAP_QP_QUOTA_OFFSET 0x10 -#define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET 0x14 -#define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET 0x18 -#define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET 0x20 -#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x24 -#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x28 +#define QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP 0x10 +#define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP 0x14 +#define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP 0x18 +#define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP 0x20 +#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP 0x24 +#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP 0x28 #define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c #define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0x30 +#define QUERY_FUNC_CAP_QP_QUOTA_OFFSET 0x50 +#define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET 0x54 +#define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET 0x58 +#define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET 0x60 +#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x64 +#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x68 + #define QUERY_FUNC_CAP_FMR_FLAG 0x80 #define QUERY_FUNC_CAP_FLAG_RDMA 0x40 #define QUERY_FUNC_CAP_FLAG_ETH 0x80 +#define QUERY_FUNC_CAP_FLAG_QUOTAS 0x10 /* when opcode modifier = 1 */ #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3 @@ -237,8 +246,9 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY); } else if (vhcr->op_modifier == 0) { - /* enable rdma and ethernet interfaces */ - field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA); + /* enable rdma and ethernet interfaces, and new quota locations */ + field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA | + QUERY_FUNC_CAP_FLAG_QUOTAS); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET); field = dev->caps.num_ports; @@ -250,14 +260,20 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, field = 0; /* protected FMR support not available as yet */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FMR_OFFSET); - size = dev->caps.num_qps; + size = priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); + size = dev->caps.num_qps; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP); - size = dev->caps.num_srqs; + size = priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); + size = dev->caps.num_srqs; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP); - size = dev->caps.num_cqs; + size = priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); + size = dev->caps.num_cqs; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP); size = dev->caps.num_eqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); @@ -265,14 +281,19 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = dev->caps.reserved_eqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); - size = dev->caps.num_mpts; + size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); + size = dev->caps.num_mpts; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP); - size = dev->caps.num_mtts; + size = priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); + size = dev->caps.num_mtts; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP); size = dev->caps.num_mgms + dev->caps.num_amgms; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP); } else err = -EINVAL; @@ -287,7 +308,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, u32 *outbox; u8 field, op_modifier; u32 size; - int err = 0; + int err = 0, quotas = 0; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ @@ -311,6 +332,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, goto out; } func_cap->flags = field; + quotas = !!(func_cap->flags & QUERY_FUNC_CAP_FLAG_QUOTAS); MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); func_cap->num_ports = field; @@ -318,29 +340,50 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET); func_cap->pf_context_behaviour = size; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); - func_cap->qp_quota = size & 0xFFFFFF; + if (quotas) { + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); + func_cap->qp_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); - func_cap->srq_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); + func_cap->srq_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); - func_cap->cq_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); + func_cap->cq_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); + func_cap->mpt_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); + func_cap->mtt_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); + func_cap->mcg_quota = size & 0xFFFFFF; + + } else { + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP); + func_cap->qp_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP); + func_cap->srq_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP); + func_cap->cq_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP); + func_cap->mpt_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP); + func_cap->mtt_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP); + func_cap->mcg_quota = size & 0xFFFFFF; + } MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET); func_cap->max_eq = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); func_cap->reserved_eq = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); - func_cap->mpt_quota = size & 0xFFFFFF; - - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); - func_cap->mtt_quota = size & 0xFFFFFF; - - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); - func_cap->mcg_quota = size & 0xFFFFFF; goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 179d26709c94..7d2628dfdc29 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -562,13 +562,17 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) } dev->caps.num_ports = func_cap.num_ports; - dev->caps.num_qps = func_cap.qp_quota; - dev->caps.num_srqs = func_cap.srq_quota; - dev->caps.num_cqs = func_cap.cq_quota; - dev->caps.num_eqs = func_cap.max_eq; - dev->caps.reserved_eqs = func_cap.reserved_eq; - dev->caps.num_mpts = func_cap.mpt_quota; - dev->caps.num_mtts = func_cap.mtt_quota; + dev->quotas.qp = func_cap.qp_quota; + dev->quotas.srq = func_cap.srq_quota; + dev->quotas.cq = func_cap.cq_quota; + dev->quotas.mpt = func_cap.mpt_quota; + dev->quotas.mtt = func_cap.mtt_quota; + dev->caps.num_qps = 1 << hca_param.log_num_qps; + dev->caps.num_srqs = 1 << hca_param.log_num_srqs; + dev->caps.num_cqs = 1 << hca_param.log_num_cqs; + dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; + dev->caps.num_eqs = func_cap.max_eq; + dev->caps.reserved_eqs = func_cap.reserved_eq; dev->caps.num_pds = MLX4_NUM_PDS; dev->caps.num_mgms = 0; dev->caps.num_amgms = 0; @@ -2102,9 +2106,15 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) "aborting.\n"); return err; } - if (num_vfs > MLX4_MAX_NUM_VF) { - printk(KERN_ERR "There are more VF's (%d) than allowed(%d)\n", - num_vfs, MLX4_MAX_NUM_VF); + + /* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS + * per port, we must limit the number of VFs to 63 (since their are + * 128 MACs) + */ + if (num_vfs >= MLX4_MAX_NUM_VF) { + dev_err(&pdev->dev, + "Requested more VF's (%d) than allowed (%d)\n", + num_vfs, MLX4_MAX_NUM_VF - 1); return -EINVAL; } @@ -2322,6 +2332,8 @@ slave_start: if (err) goto err_steer; + mlx4_init_quotas(dev); + for (port = 1; port <= dev->caps.num_ports; port++) { err = mlx4_init_port_info(dev, port); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 348bb8c7d9a7..e582a41a802b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -455,6 +455,7 @@ struct mlx4_slave_state { u8 last_cmd; u8 init_port_mask; bool active; + bool old_vlan_api; u8 function; dma_addr_t vhcr_dma; u16 mtu[MLX4_MAX_PORTS + 1]; @@ -503,12 +504,28 @@ struct slave_list { struct list_head res_list[MLX4_NUM_OF_RESOURCE_TYPE]; }; +struct resource_allocator { + spinlock_t alloc_lock; /* protect quotas */ + union { + int res_reserved; + int res_port_rsvd[MLX4_MAX_PORTS]; + }; + union { + int res_free; + int res_port_free[MLX4_MAX_PORTS]; + }; + int *quota; + int *allocated; + int *guaranteed; +}; + struct mlx4_resource_tracker { spinlock_t lock; /* tree for each resources */ struct rb_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE]; /* num_of_slave's lists, one per slave */ struct slave_list *slave_list; + struct resource_allocator res_alloc[MLX4_NUM_OF_RESOURCE_TYPE]; }; #define SLAVE_EVENT_EQ_SIZE 128 @@ -1111,7 +1128,7 @@ int mlx4_change_port_types(struct mlx4_dev *dev, void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table); void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table); -void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index); +void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz); @@ -1252,4 +1269,6 @@ static inline spinlock_t *mlx4_tlock(struct mlx4_dev *dev) void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work); +void mlx4_init_quotas(struct mlx4_dev *dev); + #endif /* MLX4_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index f91719a08cba..63391a1a7f8c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -755,14 +755,14 @@ int mlx4_init_mr_table(struct mlx4_dev *dev) struct mlx4_mr_table *mr_table = &priv->mr_table; int err; - if (!is_power_of_2(dev->caps.num_mpts)) - return -EINVAL; - /* Nothing to do for slaves - all MR handling is forwarded * to the master */ if (mlx4_is_slave(dev)) return 0; + if (!is_power_of_2(dev->caps.num_mpts)) + return -EINVAL; + err = mlx4_bitmap_init(&mr_table->mpt_bitmap, dev->caps.num_mpts, ~0, dev->caps.reserved_mrws, 0); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 946e0af5faef..caaa15470395 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -178,13 +178,24 @@ EXPORT_SYMBOL_GPL(__mlx4_register_mac); int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) { u64 out_param = 0; - int err; + int err = -EINVAL; if (mlx4_is_mfunc(dev)) { - set_param_l(&out_param, port); - err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, - RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, - MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) { + err = mlx4_cmd_imm(dev, mac, &out_param, + ((u32) port) << 8 | (u32) RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + } + if (err && err == -EINVAL && mlx4_is_slave(dev)) { + /* retry using old REG_MAC format */ + set_param_l(&out_param, port); + err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!err) + dev->flags |= MLX4_FLAG_OLD_REG_MAC; + } if (err) return err; @@ -231,10 +242,18 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) u64 out_param = 0; if (mlx4_is_mfunc(dev)) { - set_param_l(&out_param, port); - (void) mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, - RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, - MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) { + (void) mlx4_cmd_imm(dev, mac, &out_param, + ((u32) port) << 8 | (u32) RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + } else { + /* use old unregister mac format */ + set_param_l(&out_param, port); + (void) mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + } return; } __mlx4_unregister_mac(dev, port, mac); @@ -284,7 +303,7 @@ static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port, memcpy(mailbox->buf, entries, MLX4_VLAN_TABLE_SIZE); in_mod = MLX4_SET_PORT_VLAN_TABLE << 8 | port; err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, mailbox); @@ -370,9 +389,12 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index) u64 out_param = 0; int err; + if (vlan > 4095) + return -EINVAL; + if (mlx4_is_mfunc(dev)) { - set_param_l(&out_param, port); - err = mlx4_cmd_imm(dev, vlan, &out_param, RES_VLAN, + err = mlx4_cmd_imm(dev, vlan, &out_param, + ((u32) port) << 8 | (u32) RES_VLAN, RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) @@ -384,23 +406,26 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index) } EXPORT_SYMBOL_GPL(mlx4_register_vlan); -void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index) +void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) { struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; + int index; - if (index < MLX4_VLAN_REGULAR) { - mlx4_warn(dev, "Trying to free special vlan index %d\n", index); - return; + mutex_lock(&table->mutex); + if (mlx4_find_cached_vlan(dev, port, vlan, &index)) { + mlx4_warn(dev, "vlan 0x%x is not in the vlan table\n", vlan); + goto out; } - mutex_lock(&table->mutex); - if (!table->refs[index]) { - mlx4_warn(dev, "No vlan entry for index %d\n", index); + if (index < MLX4_VLAN_REGULAR) { + mlx4_warn(dev, "Trying to free special vlan index %d\n", index); goto out; } + if (--table->refs[index]) { - mlx4_dbg(dev, "Have more references for index %d," - "no need to modify vlan table\n", index); + mlx4_dbg(dev, "Have %d more references for index %d," + "no need to modify vlan table\n", table->refs[index], + index); goto out; } table->entries[index] = 0; @@ -410,23 +435,19 @@ out: mutex_unlock(&table->mutex); } -void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index) +void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) { - u64 in_param = 0; - int err; + u64 out_param = 0; if (mlx4_is_mfunc(dev)) { - set_param_l(&in_param, port); - err = mlx4_cmd(dev, in_param, RES_VLAN, RES_OP_RESERVE_AND_MAP, - MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_WRAPPED); - if (!err) - mlx4_warn(dev, "Failed freeing vlan at index:%d\n", - index); - + (void) mlx4_cmd_imm(dev, vlan, &out_param, + ((u32) port) << 8 | (u32) RES_VLAN, + RES_OP_RESERVE_AND_MAP, + MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_WRAPPED); return; } - __mlx4_unregister_vlan(dev, port, index); + __mlx4_unregister_vlan(dev, port, vlan); } EXPORT_SYMBOL_GPL(mlx4_unregister_vlan); diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index e891b058c1be..2715e61dbb74 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -480,8 +480,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) */ err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, - (1 << 23) - 1, dev->phys_caps.base_sqpn + 8 + - 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev), + (1 << 23) - 1, mlx4_num_reserved_sqps(dev), reserved_from_top); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index dd6876321116..b1603e2287a7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -55,6 +55,14 @@ struct mac_res { u8 port; }; +struct vlan_res { + struct list_head list; + u16 vlan; + int ref_count; + int vlan_index; + u8 port; +}; + struct res_common { struct list_head list; struct rb_node node; @@ -266,6 +274,7 @@ static const char *ResourceType(enum mlx4_resource rt) case RES_MPT: return "RES_MPT"; case RES_MTT: return "RES_MTT"; case RES_MAC: return "RES_MAC"; + case RES_VLAN: return "RES_VLAN"; case RES_EQ: return "RES_EQ"; case RES_COUNTER: return "RES_COUNTER"; case RES_FS_RULE: return "RES_FS_RULE"; @@ -274,10 +283,139 @@ static const char *ResourceType(enum mlx4_resource rt) }; } +static void rem_slave_vlans(struct mlx4_dev *dev, int slave); +static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave, + enum mlx4_resource res_type, int count, + int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct resource_allocator *res_alloc = + &priv->mfunc.master.res_tracker.res_alloc[res_type]; + int err = -EINVAL; + int allocated, free, reserved, guaranteed, from_free; + + if (slave > dev->num_vfs) + return -EINVAL; + + spin_lock(&res_alloc->alloc_lock); + allocated = (port > 0) ? + res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] : + res_alloc->allocated[slave]; + free = (port > 0) ? res_alloc->res_port_free[port - 1] : + res_alloc->res_free; + reserved = (port > 0) ? res_alloc->res_port_rsvd[port - 1] : + res_alloc->res_reserved; + guaranteed = res_alloc->guaranteed[slave]; + + if (allocated + count > res_alloc->quota[slave]) + goto out; + + if (allocated + count <= guaranteed) { + err = 0; + } else { + /* portion may need to be obtained from free area */ + if (guaranteed - allocated > 0) + from_free = count - (guaranteed - allocated); + else + from_free = count; + + if (free - from_free > reserved) + err = 0; + } + + if (!err) { + /* grant the request */ + if (port > 0) { + res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] += count; + res_alloc->res_port_free[port - 1] -= count; + } else { + res_alloc->allocated[slave] += count; + res_alloc->res_free -= count; + } + } + +out: + spin_unlock(&res_alloc->alloc_lock); + return err; +} + +static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave, + enum mlx4_resource res_type, int count, + int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct resource_allocator *res_alloc = + &priv->mfunc.master.res_tracker.res_alloc[res_type]; + + if (slave > dev->num_vfs) + return; + + spin_lock(&res_alloc->alloc_lock); + if (port > 0) { + res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] -= count; + res_alloc->res_port_free[port - 1] += count; + } else { + res_alloc->allocated[slave] -= count; + res_alloc->res_free += count; + } + + spin_unlock(&res_alloc->alloc_lock); + return; +} + +static inline void initialize_res_quotas(struct mlx4_dev *dev, + struct resource_allocator *res_alloc, + enum mlx4_resource res_type, + int vf, int num_instances) +{ + res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1)); + res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf]; + if (vf == mlx4_master_func_num(dev)) { + res_alloc->res_free = num_instances; + if (res_type == RES_MTT) { + /* reserved mtts will be taken out of the PF allocation */ + res_alloc->res_free += dev->caps.reserved_mtts; + res_alloc->guaranteed[vf] += dev->caps.reserved_mtts; + res_alloc->quota[vf] += dev->caps.reserved_mtts; + } + } +} + +void mlx4_init_quotas(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int pf; + + /* quotas for VFs are initialized in mlx4_slave_cap */ + if (mlx4_is_slave(dev)) + return; + + if (!mlx4_is_mfunc(dev)) { + dev->quotas.qp = dev->caps.num_qps - dev->caps.reserved_qps - + mlx4_num_reserved_sqps(dev); + dev->quotas.cq = dev->caps.num_cqs - dev->caps.reserved_cqs; + dev->quotas.srq = dev->caps.num_srqs - dev->caps.reserved_srqs; + dev->quotas.mtt = dev->caps.num_mtts - dev->caps.reserved_mtts; + dev->quotas.mpt = dev->caps.num_mpts - dev->caps.reserved_mrws; + return; + } + + pf = mlx4_master_func_num(dev); + dev->quotas.qp = + priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[pf]; + dev->quotas.cq = + priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[pf]; + dev->quotas.srq = + priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[pf]; + dev->quotas.mtt = + priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[pf]; + dev->quotas.mpt = + priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf]; +} int mlx4_init_resource_tracker(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - int i; + int i, j; int t; priv->mfunc.master.res_tracker.slave_list = @@ -298,8 +436,105 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) for (i = 0 ; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) priv->mfunc.master.res_tracker.res_tree[i] = RB_ROOT; + for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { + struct resource_allocator *res_alloc = + &priv->mfunc.master.res_tracker.res_alloc[i]; + res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + if (i == RES_MAC || i == RES_VLAN) + res_alloc->allocated = kzalloc(MLX4_MAX_PORTS * + (dev->num_vfs + 1) * sizeof(int), + GFP_KERNEL); + else + res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + + if (!res_alloc->quota || !res_alloc->guaranteed || + !res_alloc->allocated) + goto no_mem_err; + + spin_lock_init(&res_alloc->alloc_lock); + for (t = 0; t < dev->num_vfs + 1; t++) { + switch (i) { + case RES_QP: + initialize_res_quotas(dev, res_alloc, RES_QP, + t, dev->caps.num_qps - + dev->caps.reserved_qps - + mlx4_num_reserved_sqps(dev)); + break; + case RES_CQ: + initialize_res_quotas(dev, res_alloc, RES_CQ, + t, dev->caps.num_cqs - + dev->caps.reserved_cqs); + break; + case RES_SRQ: + initialize_res_quotas(dev, res_alloc, RES_SRQ, + t, dev->caps.num_srqs - + dev->caps.reserved_srqs); + break; + case RES_MPT: + initialize_res_quotas(dev, res_alloc, RES_MPT, + t, dev->caps.num_mpts - + dev->caps.reserved_mrws); + break; + case RES_MTT: + initialize_res_quotas(dev, res_alloc, RES_MTT, + t, dev->caps.num_mtts - + dev->caps.reserved_mtts); + break; + case RES_MAC: + if (t == mlx4_master_func_num(dev)) { + res_alloc->quota[t] = MLX4_MAX_MAC_NUM; + res_alloc->guaranteed[t] = 2; + for (j = 0; j < MLX4_MAX_PORTS; j++) + res_alloc->res_port_free[j] = MLX4_MAX_MAC_NUM; + } else { + res_alloc->quota[t] = MLX4_MAX_MAC_NUM; + res_alloc->guaranteed[t] = 2; + } + break; + case RES_VLAN: + if (t == mlx4_master_func_num(dev)) { + res_alloc->quota[t] = MLX4_MAX_VLAN_NUM; + res_alloc->guaranteed[t] = MLX4_MAX_VLAN_NUM / 2; + for (j = 0; j < MLX4_MAX_PORTS; j++) + res_alloc->res_port_free[j] = + res_alloc->quota[t]; + } else { + res_alloc->quota[t] = MLX4_MAX_VLAN_NUM / 2; + res_alloc->guaranteed[t] = 0; + } + break; + case RES_COUNTER: + res_alloc->quota[t] = dev->caps.max_counters; + res_alloc->guaranteed[t] = 0; + if (t == mlx4_master_func_num(dev)) + res_alloc->res_free = res_alloc->quota[t]; + break; + default: + break; + } + if (i == RES_MAC || i == RES_VLAN) { + for (j = 0; j < MLX4_MAX_PORTS; j++) + res_alloc->res_port_rsvd[j] += + res_alloc->guaranteed[t]; + } else { + res_alloc->res_reserved += res_alloc->guaranteed[t]; + } + } + } spin_lock_init(&priv->mfunc.master.res_tracker.lock); - return 0 ; + return 0; + +no_mem_err: + for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { + kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated); + priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed); + priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota); + priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL; + } + return -ENOMEM; } void mlx4_free_resource_tracker(struct mlx4_dev *dev, @@ -309,13 +544,28 @@ void mlx4_free_resource_tracker(struct mlx4_dev *dev, int i; if (priv->mfunc.master.res_tracker.slave_list) { - if (type != RES_TR_FREE_STRUCTS_ONLY) - for (i = 0 ; i < dev->num_slaves; i++) + if (type != RES_TR_FREE_STRUCTS_ONLY) { + for (i = 0; i < dev->num_slaves; i++) { if (type == RES_TR_FREE_ALL || dev->caps.function != i) mlx4_delete_all_resources_for_slave(dev, i); + } + /* free master's vlans */ + i = dev->caps.function; + mutex_lock(&priv->mfunc.master.res_tracker.slave_list[i].mutex); + rem_slave_vlans(dev, i); + mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[i].mutex); + } if (type != RES_TR_FREE_SLAVES_ONLY) { + for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { + kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated); + priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed); + priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota); + priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL; + } kfree(priv->mfunc.master.res_tracker.slave_list); priv->mfunc.master.res_tracker.slave_list = NULL; } @@ -1229,12 +1479,19 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, case RES_OP_RESERVE: count = get_param_l(&in_param); align = get_param_h(&in_param); - err = __mlx4_qp_reserve_range(dev, count, align, &base); + err = mlx4_grant_resource(dev, slave, RES_QP, count, 0); if (err) return err; + err = __mlx4_qp_reserve_range(dev, count, align, &base); + if (err) { + mlx4_release_resource(dev, slave, RES_QP, count, 0); + return err; + } + err = add_res_range(dev, slave, base, count, RES_QP, 0); if (err) { + mlx4_release_resource(dev, slave, RES_QP, count, 0); __mlx4_qp_release_range(dev, base, count); return err; } @@ -1282,15 +1539,24 @@ static int mtt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, return err; order = get_param_l(&in_param); + + err = mlx4_grant_resource(dev, slave, RES_MTT, 1 << order, 0); + if (err) + return err; + base = __mlx4_alloc_mtt_range(dev, order); - if (base == -1) + if (base == -1) { + mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0); return -ENOMEM; + } err = add_res_range(dev, slave, base, 1, RES_MTT, order); - if (err) + if (err) { + mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0); __mlx4_free_mtt_range(dev, base, order); - else + } else { set_param_l(out_param, base); + } return err; } @@ -1305,13 +1571,20 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, switch (op) { case RES_OP_RESERVE: + err = mlx4_grant_resource(dev, slave, RES_MPT, 1, 0); + if (err) + break; + index = __mlx4_mpt_reserve(dev); - if (index == -1) + if (index == -1) { + mlx4_release_resource(dev, slave, RES_MPT, 1, 0); break; + } id = index & mpt_mask(dev); err = add_res_range(dev, slave, id, 1, RES_MPT, index); if (err) { + mlx4_release_resource(dev, slave, RES_MPT, 1, 0); __mlx4_mpt_release(dev, index); break; } @@ -1345,12 +1618,19 @@ static int cq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, switch (op) { case RES_OP_RESERVE_AND_MAP: - err = __mlx4_cq_alloc_icm(dev, &cqn); + err = mlx4_grant_resource(dev, slave, RES_CQ, 1, 0); if (err) break; + err = __mlx4_cq_alloc_icm(dev, &cqn); + if (err) { + mlx4_release_resource(dev, slave, RES_CQ, 1, 0); + break; + } + err = add_res_range(dev, slave, cqn, 1, RES_CQ, 0); if (err) { + mlx4_release_resource(dev, slave, RES_CQ, 1, 0); __mlx4_cq_free_icm(dev, cqn); break; } @@ -1373,12 +1653,19 @@ static int srq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, switch (op) { case RES_OP_RESERVE_AND_MAP: - err = __mlx4_srq_alloc_icm(dev, &srqn); + err = mlx4_grant_resource(dev, slave, RES_SRQ, 1, 0); if (err) break; + err = __mlx4_srq_alloc_icm(dev, &srqn); + if (err) { + mlx4_release_resource(dev, slave, RES_SRQ, 1, 0); + break; + } + err = add_res_range(dev, slave, srqn, 1, RES_SRQ, 0); if (err) { + mlx4_release_resource(dev, slave, RES_SRQ, 1, 0); __mlx4_srq_free_icm(dev, srqn); break; } @@ -1399,9 +1686,13 @@ static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port) struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct mac_res *res; + if (mlx4_grant_resource(dev, slave, RES_MAC, 1, port)) + return -EINVAL; res = kzalloc(sizeof *res, GFP_KERNEL); - if (!res) + if (!res) { + mlx4_release_resource(dev, slave, RES_MAC, 1, port); return -ENOMEM; + } res->mac = mac; res->port = (u8) port; list_add_tail(&res->list, @@ -1421,6 +1712,7 @@ static void mac_del_from_slave(struct mlx4_dev *dev, int slave, u64 mac, list_for_each_entry_safe(res, tmp, mac_list, list) { if (res->mac == mac && res->port == (u8) port) { list_del(&res->list); + mlx4_release_resource(dev, slave, RES_MAC, 1, port); kfree(res); break; } @@ -1438,12 +1730,13 @@ static void rem_slave_macs(struct mlx4_dev *dev, int slave) list_for_each_entry_safe(res, tmp, mac_list, list) { list_del(&res->list); __mlx4_unregister_mac(dev, res->port, res->mac); + mlx4_release_resource(dev, slave, RES_MAC, 1, res->port); kfree(res); } } static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param) + u64 in_param, u64 *out_param, int in_port) { int err = -EINVAL; int port; @@ -1452,7 +1745,7 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (op != RES_OP_RESERVE_AND_MAP) return err; - port = get_param_l(out_param); + port = !in_port ? get_param_l(out_param) : in_port; mac = in_param; err = __mlx4_register_mac(dev, port, mac); @@ -1469,12 +1762,114 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, return err; } -static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param) +static int vlan_add_to_slave(struct mlx4_dev *dev, int slave, u16 vlan, + int port, int vlan_index) { + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *vlan_list = + &tracker->slave_list[slave].res_list[RES_VLAN]; + struct vlan_res *res, *tmp; + + list_for_each_entry_safe(res, tmp, vlan_list, list) { + if (res->vlan == vlan && res->port == (u8) port) { + /* vlan found. update ref count */ + ++res->ref_count; + return 0; + } + } + + if (mlx4_grant_resource(dev, slave, RES_VLAN, 1, port)) + return -EINVAL; + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) { + mlx4_release_resource(dev, slave, RES_VLAN, 1, port); + return -ENOMEM; + } + res->vlan = vlan; + res->port = (u8) port; + res->vlan_index = vlan_index; + res->ref_count = 1; + list_add_tail(&res->list, + &tracker->slave_list[slave].res_list[RES_VLAN]); return 0; } + +static void vlan_del_from_slave(struct mlx4_dev *dev, int slave, u16 vlan, + int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *vlan_list = + &tracker->slave_list[slave].res_list[RES_VLAN]; + struct vlan_res *res, *tmp; + + list_for_each_entry_safe(res, tmp, vlan_list, list) { + if (res->vlan == vlan && res->port == (u8) port) { + if (!--res->ref_count) { + list_del(&res->list); + mlx4_release_resource(dev, slave, RES_VLAN, + 1, port); + kfree(res); + } + break; + } + } +} + +static void rem_slave_vlans(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *vlan_list = + &tracker->slave_list[slave].res_list[RES_VLAN]; + struct vlan_res *res, *tmp; + int i; + + list_for_each_entry_safe(res, tmp, vlan_list, list) { + list_del(&res->list); + /* dereference the vlan the num times the slave referenced it */ + for (i = 0; i < res->ref_count; i++) + __mlx4_unregister_vlan(dev, res->port, res->vlan); + mlx4_release_resource(dev, slave, RES_VLAN, 1, res->port); + kfree(res); + } +} + +static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param, int in_port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; + int err; + u16 vlan; + int vlan_index; + int port; + + port = !in_port ? get_param_l(out_param) : in_port; + + if (!port || op != RES_OP_RESERVE_AND_MAP) + return -EINVAL; + + /* upstream kernels had NOP for reg/unreg vlan. Continue this. */ + if (!in_port && port > 0 && port <= dev->caps.num_ports) { + slave_state[slave].old_vlan_api = true; + return 0; + } + + vlan = (u16) in_param; + + err = __mlx4_register_vlan(dev, port, vlan, &vlan_index); + if (!err) { + set_param_l(out_param, (u32) vlan_index); + err = vlan_add_to_slave(dev, slave, vlan, port, vlan_index); + if (err) + __mlx4_unregister_vlan(dev, port, vlan); + } + return err; +} + static int counter_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { @@ -1484,15 +1879,23 @@ static int counter_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (op != RES_OP_RESERVE) return -EINVAL; - err = __mlx4_counter_alloc(dev, &index); + err = mlx4_grant_resource(dev, slave, RES_COUNTER, 1, 0); if (err) return err; + err = __mlx4_counter_alloc(dev, &index); + if (err) { + mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); + return err; + } + err = add_res_range(dev, slave, index, 1, RES_COUNTER, 0); - if (err) + if (err) { __mlx4_counter_free(dev, index); - else + mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); + } else { set_param_l(out_param, index); + } return err; } @@ -1528,7 +1931,7 @@ int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave, int err; int alop = vhcr->op_modifier; - switch (vhcr->in_modifier) { + switch (vhcr->in_modifier & 0xFF) { case RES_QP: err = qp_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); @@ -1556,12 +1959,14 @@ int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave, case RES_MAC: err = mac_alloc_res(dev, slave, vhcr->op_modifier, alop, - vhcr->in_param, &vhcr->out_param); + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); break; case RES_VLAN: err = vlan_alloc_res(dev, slave, vhcr->op_modifier, alop, - vhcr->in_param, &vhcr->out_param); + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); break; case RES_COUNTER: @@ -1597,6 +2002,7 @@ static int qp_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, err = rem_res_range(dev, slave, base, count, RES_QP, 0); if (err) break; + mlx4_release_resource(dev, slave, RES_QP, count, 0); __mlx4_qp_release_range(dev, base, count); break; case RES_OP_MAP_ICM: @@ -1634,8 +2040,10 @@ static int mtt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, base = get_param_l(&in_param); order = get_param_h(&in_param); err = rem_res_range(dev, slave, base, 1, RES_MTT, order); - if (!err) + if (!err) { + mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0); __mlx4_free_mtt_range(dev, base, order); + } return err; } @@ -1660,6 +2068,7 @@ static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, err = rem_res_range(dev, slave, id, 1, RES_MPT, 0); if (err) break; + mlx4_release_resource(dev, slave, RES_MPT, 1, 0); __mlx4_mpt_release(dev, index); break; case RES_OP_MAP_ICM: @@ -1694,6 +2103,7 @@ static int cq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) break; + mlx4_release_resource(dev, slave, RES_CQ, 1, 0); __mlx4_cq_free_icm(dev, cqn); break; @@ -1718,6 +2128,7 @@ static int srq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) break; + mlx4_release_resource(dev, slave, RES_SRQ, 1, 0); __mlx4_srq_free_icm(dev, srqn); break; @@ -1730,14 +2141,14 @@ static int srq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, } static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param) + u64 in_param, u64 *out_param, int in_port) { int port; int err = 0; switch (op) { case RES_OP_RESERVE_AND_MAP: - port = get_param_l(out_param); + port = !in_port ? get_param_l(out_param) : in_port; mac_del_from_slave(dev, slave, in_param, port); __mlx4_unregister_mac(dev, port, in_param); break; @@ -1751,9 +2162,27 @@ static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, } static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param) + u64 in_param, u64 *out_param, int port) { - return 0; + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; + int err = 0; + + switch (op) { + case RES_OP_RESERVE_AND_MAP: + if (slave_state[slave].old_vlan_api) + return 0; + if (!port) + return -EINVAL; + vlan_del_from_slave(dev, slave, in_param, port); + __mlx4_unregister_vlan(dev, port, in_param); + break; + default: + err = -EINVAL; + break; + } + + return err; } static int counter_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, @@ -1771,6 +2200,7 @@ static int counter_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, return err; __mlx4_counter_free(dev, index); + mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); return err; } @@ -1803,7 +2233,7 @@ int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave, int err = -EINVAL; int alop = vhcr->op_modifier; - switch (vhcr->in_modifier) { + switch (vhcr->in_modifier & 0xFF) { case RES_QP: err = qp_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param); @@ -1831,12 +2261,14 @@ int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave, case RES_MAC: err = mac_free_res(dev, slave, vhcr->op_modifier, alop, - vhcr->in_param, &vhcr->out_param); + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); break; case RES_VLAN: err = vlan_free_res(dev, slave, vhcr->op_modifier, alop, - vhcr->in_param, &vhcr->out_param); + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); break; case RES_COUNTER: @@ -3498,6 +3930,11 @@ static void rem_slave_qps(struct mlx4_dev *dev, int slave) &tracker->res_tree[RES_QP]); list_del(&qp->com.list); spin_unlock_irq(mlx4_tlock(dev)); + if (!valid_reserved(dev, slave, qpn)) { + __mlx4_qp_release_range(dev, qpn, 1); + mlx4_release_resource(dev, slave, + RES_QP, 1, 0); + } kfree(qp); state = 0; break; @@ -3569,6 +4006,8 @@ static void rem_slave_srqs(struct mlx4_dev *dev, int slave) &tracker->res_tree[RES_SRQ]); list_del(&srq->com.list); spin_unlock_irq(mlx4_tlock(dev)); + mlx4_release_resource(dev, slave, + RES_SRQ, 1, 0); kfree(srq); state = 0; break; @@ -3635,6 +4074,8 @@ static void rem_slave_cqs(struct mlx4_dev *dev, int slave) &tracker->res_tree[RES_CQ]); list_del(&cq->com.list); spin_unlock_irq(mlx4_tlock(dev)); + mlx4_release_resource(dev, slave, + RES_CQ, 1, 0); kfree(cq); state = 0; break; @@ -3698,6 +4139,8 @@ static void rem_slave_mrs(struct mlx4_dev *dev, int slave) &tracker->res_tree[RES_MPT]); list_del(&mpt->com.list); spin_unlock_irq(mlx4_tlock(dev)); + mlx4_release_resource(dev, slave, + RES_MPT, 1, 0); kfree(mpt); state = 0; break; @@ -3767,6 +4210,8 @@ static void rem_slave_mtts(struct mlx4_dev *dev, int slave) &tracker->res_tree[RES_MTT]); list_del(&mtt->com.list); spin_unlock_irq(mlx4_tlock(dev)); + mlx4_release_resource(dev, slave, RES_MTT, + 1 << mtt->order, 0); kfree(mtt); state = 0; break; @@ -3925,6 +4370,7 @@ static void rem_slave_counters(struct mlx4_dev *dev, int slave) list_del(&counter->com.list); kfree(counter); __mlx4_counter_free(dev, index); + mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); } } spin_unlock_irq(mlx4_tlock(dev)); @@ -3964,7 +4410,7 @@ void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave) struct mlx4_priv *priv = mlx4_priv(dev); mutex_lock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); - /*VLAN*/ + rem_slave_vlans(dev, slave); rem_slave_macs(dev, slave); rem_slave_fs_rule(dev, slave); rem_slave_qps(dev, slave); @@ -4081,7 +4527,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN && !errors && NO_INDX != work->orig_vlan_ix) __mlx4_unregister_vlan(&work->priv->dev, work->port, - work->orig_vlan_ix); + work->orig_vlan_id); out: kfree(work); return; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 0c2405dbc970..631ea0ac1cd8 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -38,8 +38,8 @@ #define _QLCNIC_LINUX_MAJOR 5 #define _QLCNIC_LINUX_MINOR 3 -#define _QLCNIC_LINUX_SUBVERSION 51 -#define QLCNIC_LINUX_VERSIONID "5.3.51" +#define _QLCNIC_LINUX_SUBVERSION 52 +#define QLCNIC_LINUX_VERSIONID "5.3.52" #define QLCNIC_DRV_IDC_VER 0x01 #define QLCNIC_DRIVER_VERSION ((_QLCNIC_LINUX_MAJOR << 16) |\ (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION)) @@ -98,8 +98,22 @@ #define TX_STOP_THRESH ((MAX_SKB_FRAGS >> 2) + MAX_TSO_HEADER_DESC \ + MGMT_CMD_DESC_RESV) #define QLCNIC_MAX_TX_TIMEOUTS 2 -#define QLCNIC_MAX_TX_RINGS 8 -#define QLCNIC_MAX_SDS_RINGS 8 + +/* Driver will use 1 Tx ring in INT-x/MSI/SRIOV mode. */ +#define QLCNIC_SINGLE_RING 1 +#define QLCNIC_DEF_SDS_RINGS 4 +#define QLCNIC_DEF_TX_RINGS 4 +#define QLCNIC_MAX_VNIC_TX_RINGS 4 +#define QLCNIC_MAX_VNIC_SDS_RINGS 4 + +enum qlcnic_queue_type { + QLCNIC_TX_QUEUE = 1, + QLCNIC_RX_QUEUE, +}; + +/* Operational mode for driver */ +#define QLCNIC_VNIC_MODE 0xFF +#define QLCNIC_DEFAULT_MODE 0x0 /* * Following are the states of the Phantom. Phantom will set them and @@ -533,6 +547,14 @@ struct qlcnic_host_sds_ring { char name[IFNAMSIZ + 12]; } ____cacheline_internodealigned_in_smp; +struct qlcnic_tx_queue_stats { + u64 xmit_on; + u64 xmit_off; + u64 xmit_called; + u64 xmit_finished; + u64 tx_bytes; +}; + struct qlcnic_host_tx_ring { int irq; void __iomem *crb_intr_mask; @@ -544,10 +566,7 @@ struct qlcnic_host_tx_ring { u32 sw_consumer; u32 num_desc; - u64 xmit_on; - u64 xmit_off; - u64 xmit_called; - u64 xmit_finished; + struct qlcnic_tx_queue_stats tx_stats; void __iomem *crb_cmd_producer; struct cmd_desc_type0 *desc_head; @@ -940,8 +959,6 @@ struct qlcnic_ipaddr { #define QLCNIC_BEACON_EANBLE 0xC #define QLCNIC_BEACON_DISABLE 0xD -#define QLCNIC_DEF_NUM_STS_DESC_RINGS 4 -#define QLCNIC_DEF_NUM_TX_RINGS 4 #define QLCNIC_MSIX_TBL_SPACE 8192 #define QLCNIC_PCI_REG_MSIX_TBL 0x44 #define QLCNIC_MSIX_TBL_PGSIZE 4096 @@ -961,6 +978,7 @@ struct qlcnic_ipaddr { #define __QLCNIC_SRIOV_CAPABLE 11 #define __QLCNIC_MBX_POLL_ENABLE 12 #define __QLCNIC_DIAG_MODE 13 +#define __QLCNIC_MAINTENANCE_MODE 16 #define QLCNIC_INTERRUPT_TEST 1 #define QLCNIC_LOOPBACK_TEST 2 @@ -1011,7 +1029,6 @@ struct qlcnic_adapter { unsigned long state; u32 flags; - int max_drv_tx_rings; u16 num_txd; u16 num_rxd; u16 num_jumbo_rxd; @@ -1019,7 +1036,13 @@ struct qlcnic_adapter { u16 max_jumbo_rxd; u8 max_rds_rings; - u8 max_sds_rings; + + u8 max_sds_rings; /* max sds rings supported by adapter */ + u8 max_tx_rings; /* max tx rings supported by adapter */ + + u8 drv_tx_rings; /* max tx rings supported by driver */ + u8 drv_sds_rings; /* max sds rings supported by driver */ + u8 rx_csum; u8 portnum; @@ -1542,12 +1565,13 @@ int qlcnic_loopback_test(struct net_device *, u8); /* Functions from qlcnic_main.c */ int qlcnic_reset_context(struct qlcnic_adapter *); -void qlcnic_diag_free_res(struct net_device *netdev, int max_sds_rings); -int qlcnic_diag_alloc_res(struct net_device *netdev, int test); -netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev); -int qlcnic_set_max_rss(struct qlcnic_adapter *, u8, int); -int qlcnic_validate_max_rss(struct qlcnic_adapter *, __u32); -int qlcnic_validate_max_tx_rings(struct qlcnic_adapter *, u32 txq); +void qlcnic_diag_free_res(struct net_device *netdev, int); +int qlcnic_diag_alloc_res(struct net_device *netdev, int); +netdev_tx_t qlcnic_xmit_frame(struct sk_buff *, struct net_device *); +void qlcnic_set_tx_ring_count(struct qlcnic_adapter *, u8); +void qlcnic_set_sds_ring_count(struct qlcnic_adapter *, u8); +int qlcnic_setup_rings(struct qlcnic_adapter *, u8, u8); +int qlcnic_validate_rings(struct qlcnic_adapter *, __u32, int); void qlcnic_alloc_lb_filters_mem(struct qlcnic_adapter *adapter); void qlcnic_82xx_set_mac_filter_count(struct qlcnic_adapter *); int qlcnic_enable_msix(struct qlcnic_adapter *, u32); @@ -1640,19 +1664,18 @@ static inline u32 qlcnic_tx_avail(struct qlcnic_host_tx_ring *tx_ring) static inline int qlcnic_set_real_num_queues(struct qlcnic_adapter *adapter, struct net_device *netdev) { - int err, tx_q; - - tx_q = adapter->max_drv_tx_rings; + int err; - netdev->num_tx_queues = tx_q; - netdev->real_num_tx_queues = tx_q; + netdev->num_tx_queues = adapter->drv_tx_rings; + netdev->real_num_tx_queues = adapter->drv_tx_rings; - err = netif_set_real_num_tx_queues(netdev, tx_q); + err = netif_set_real_num_tx_queues(netdev, adapter->drv_tx_rings); if (err) dev_err(&adapter->pdev->dev, "failed to set %d Tx queues\n", - tx_q); + adapter->drv_tx_rings); else - dev_info(&adapter->pdev->dev, "set %d Tx queues\n", tx_q); + dev_info(&adapter->pdev->dev, "Set %d Tx queues\n", + adapter->drv_tx_rings); return err; } @@ -1694,7 +1717,7 @@ struct qlcnic_hardware_ops { int (*write_reg) (struct qlcnic_adapter *, ulong, u32); void (*get_ocm_win) (struct qlcnic_hardware_context *); int (*get_mac_address) (struct qlcnic_adapter *, u8 *, u8); - int (*setup_intr) (struct qlcnic_adapter *, u8, int); + int (*setup_intr) (struct qlcnic_adapter *); int (*alloc_mbx_args)(struct qlcnic_cmd_args *, struct qlcnic_adapter *, u32); int (*mbx_cmd) (struct qlcnic_adapter *, struct qlcnic_cmd_args *); @@ -1765,10 +1788,9 @@ static inline int qlcnic_get_mac_address(struct qlcnic_adapter *adapter, return adapter->ahw->hw_ops->get_mac_address(adapter, mac, function); } -static inline int qlcnic_setup_intr(struct qlcnic_adapter *adapter, - u8 num_intr, int txq) +static inline int qlcnic_setup_intr(struct qlcnic_adapter *adapter) { - return adapter->ahw->hw_ops->setup_intr(adapter, num_intr, txq); + return adapter->ahw->hw_ops->setup_intr(adapter); } static inline int qlcnic_alloc_mbx_args(struct qlcnic_cmd_args *mbx, @@ -2004,7 +2026,7 @@ static inline bool qlcnic_check_multi_tx(struct qlcnic_adapter *adapter) static inline void qlcnic_disable_multi_tx(struct qlcnic_adapter *adapter) { test_and_clear_bit(__QLCNIC_MULTI_TX_UNIQUE, &adapter->state); - adapter->max_drv_tx_rings = 1; + adapter->drv_tx_rings = QLCNIC_SINGLE_RING; } /* When operating in a muti tx mode, driver needs to write 0x1 diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index a126bdf27952..09810ddd11ec 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -13,7 +13,6 @@ #include <linux/interrupt.h> #include <linux/aer.h> -#define QLCNIC_MAX_TX_QUEUES 1 #define RSS_HASHTYPE_IP_TCP 0x3 #define QLC_83XX_FW_MBX_CMD 0 @@ -268,20 +267,18 @@ int qlcnic_83xx_wrt_reg_indirect(struct qlcnic_adapter *adapter, ulong addr, } } -int qlcnic_83xx_setup_intr(struct qlcnic_adapter *adapter, u8 num_intr, int txq) +int qlcnic_83xx_setup_intr(struct qlcnic_adapter *adapter) { int err, i, num_msix; struct qlcnic_hardware_context *ahw = adapter->ahw; - if (!num_intr) - num_intr = QLCNIC_DEF_NUM_STS_DESC_RINGS; - num_msix = rounddown_pow_of_two(min_t(int, num_online_cpus(), - num_intr)); + num_msix = adapter->drv_sds_rings; + /* account for AEN interrupt MSI-X based interrupts */ num_msix += 1; if (!(adapter->flags & QLCNIC_TX_INTR_SHARED)) - num_msix += adapter->max_drv_tx_rings; + num_msix += adapter->drv_tx_rings; err = qlcnic_enable_msix(adapter, num_msix); if (err == -ENOMEM) @@ -325,7 +322,8 @@ inline void qlcnic_83xx_clear_legacy_intr_mask(struct qlcnic_adapter *adapter) inline void qlcnic_83xx_set_legacy_intr_mask(struct qlcnic_adapter *adapter) { - writel(1, adapter->tgt_mask_reg); + if (adapter->tgt_mask_reg) + writel(1, adapter->tgt_mask_reg); } /* Enable MSI-x and INT-x interrupts */ @@ -498,8 +496,11 @@ void qlcnic_83xx_free_mbx_intr(struct qlcnic_adapter *adapter) num_msix = 0; msleep(20); - synchronize_irq(adapter->msix_entries[num_msix].vector); - free_irq(adapter->msix_entries[num_msix].vector, adapter); + + if (adapter->msix_entries) { + synchronize_irq(adapter->msix_entries[num_msix].vector); + free_irq(adapter->msix_entries[num_msix].vector, adapter); + } } int qlcnic_83xx_setup_mbx_intr(struct qlcnic_adapter *adapter) @@ -760,6 +761,9 @@ int qlcnic_83xx_issue_cmd(struct qlcnic_adapter *adapter, int cmd_type, err, opcode; unsigned long timeout; + if (!mbx) + return -EIO; + opcode = LSW(cmd->req.arg[0]); cmd_type = cmd->type; err = mbx->ops->enqueue_cmd(adapter, cmd, &timeout); @@ -979,14 +983,14 @@ static int qlcnic_83xx_add_rings(struct qlcnic_adapter *adapter) sds_mbx_size = sizeof(struct qlcnic_sds_mbx); context_id = recv_ctx->context_id; - num_sds = (adapter->max_sds_rings - QLCNIC_MAX_RING_SETS); + num_sds = adapter->drv_sds_rings - QLCNIC_MAX_SDS_RINGS; ahw->hw_ops->alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_ADD_RCV_RINGS); cmd.req.arg[1] = 0 | (num_sds << 8) | (context_id << 16); /* set up status rings, mbx 2-81 */ index = 2; - for (i = 8; i < adapter->max_sds_rings; i++) { + for (i = 8; i < adapter->drv_sds_rings; i++) { memset(&sds_mbx, 0, sds_mbx_size); sds = &recv_ctx->sds_rings[i]; sds->consumer = 0; @@ -1021,7 +1025,7 @@ static int qlcnic_83xx_add_rings(struct qlcnic_adapter *adapter) mbx_out = (struct qlcnic_add_rings_mbx_out *)&cmd.rsp.arg[1]; index = 0; /* status descriptor ring */ - for (i = 8; i < adapter->max_sds_rings; i++) { + for (i = 8; i < adapter->drv_sds_rings; i++) { sds = &recv_ctx->sds_rings[i]; sds->crb_sts_consumer = ahw->pci_base0 + mbx_out->host_csmr[index]; @@ -1079,10 +1083,10 @@ int qlcnic_83xx_create_rx_ctx(struct qlcnic_adapter *adapter) struct qlcnic_hardware_context *ahw = adapter->ahw; num_rds = adapter->max_rds_rings; - if (adapter->max_sds_rings <= QLCNIC_MAX_RING_SETS) - num_sds = adapter->max_sds_rings; + if (adapter->drv_sds_rings <= QLCNIC_MAX_SDS_RINGS) + num_sds = adapter->drv_sds_rings; else - num_sds = QLCNIC_MAX_RING_SETS; + num_sds = QLCNIC_MAX_SDS_RINGS; sds_mbx_size = sizeof(struct qlcnic_sds_mbx); rds_mbx_size = sizeof(struct qlcnic_rds_mbx); @@ -1183,7 +1187,7 @@ int qlcnic_83xx_create_rx_ctx(struct qlcnic_adapter *adapter) sds->crb_intr_mask = ahw->pci_base0 + intr_mask; } - if (adapter->max_sds_rings > QLCNIC_MAX_RING_SETS) + if (adapter->drv_sds_rings > QLCNIC_MAX_SDS_RINGS) err = qlcnic_83xx_add_rings(adapter); out: qlcnic_free_mbx_args(&cmd); @@ -1239,9 +1243,9 @@ int qlcnic_83xx_create_tx_ctx(struct qlcnic_adapter *adapter, mbx.size = tx->num_desc; if (adapter->flags & QLCNIC_MSIX_ENABLED) { if (!(adapter->flags & QLCNIC_TX_INTR_SHARED)) - msix_vector = adapter->max_sds_rings + ring; + msix_vector = adapter->drv_sds_rings + ring; else - msix_vector = adapter->max_sds_rings - 1; + msix_vector = adapter->drv_sds_rings - 1; msix_id = ahw->intr_tbl[msix_vector].id; } else { msix_id = QLCRDX(ahw, QLCNIC_DEF_INT_ID); @@ -1264,7 +1268,8 @@ int qlcnic_83xx_create_tx_ctx(struct qlcnic_adapter *adapter, qlcnic_pf_set_interface_id_create_tx_ctx(adapter, &temp); cmd.req.arg[1] = QLCNIC_CAP0_LEGACY_CONTEXT; - cmd.req.arg[5] = QLCNIC_MAX_TX_QUEUES | temp; + cmd.req.arg[5] = QLCNIC_SINGLE_RING | temp; + buf = &cmd.req.arg[6]; memcpy(buf, &mbx, sizeof(struct qlcnic_tx_mbx)); /* send the mailbox command*/ @@ -1279,7 +1284,7 @@ int qlcnic_83xx_create_tx_ctx(struct qlcnic_adapter *adapter, tx->ctx_id = mbx_out->ctx_id; if ((adapter->flags & QLCNIC_MSIX_ENABLED) && !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { - intr_mask = ahw->intr_tbl[adapter->max_sds_rings + ring].src; + intr_mask = ahw->intr_tbl[adapter->drv_sds_rings + ring].src; tx->crb_intr_mask = ahw->pci_base0 + intr_mask; } dev_info(&adapter->pdev->dev, "Tx Context[0x%x] Created, state:0x%x\n", @@ -1290,7 +1295,7 @@ out: } static int qlcnic_83xx_diag_alloc_res(struct net_device *netdev, int test, - int num_sds_ring) + u8 num_sds_ring) { struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_host_sds_ring *sds_ring; @@ -1306,7 +1311,7 @@ static int qlcnic_83xx_diag_alloc_res(struct net_device *netdev, int test, qlcnic_detach(adapter); - adapter->max_sds_rings = 1; + adapter->drv_sds_rings = QLCNIC_SINGLE_RING; adapter->ahw->diag_test = test; adapter->ahw->linkup = 0; @@ -1320,7 +1325,7 @@ static int qlcnic_83xx_diag_alloc_res(struct net_device *netdev, int test, if (ret) { qlcnic_detach(adapter); if (adapter_state == QLCNIC_ADAPTER_UP_MAGIC) { - adapter->max_sds_rings = num_sds_ring; + adapter->drv_sds_rings = num_sds_ring; qlcnic_attach(adapter); } netif_device_attach(netdev); @@ -1333,7 +1338,7 @@ static int qlcnic_83xx_diag_alloc_res(struct net_device *netdev, int test, } if (adapter->ahw->diag_test == QLCNIC_INTERRUPT_TEST) { - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &adapter->recv_ctx->sds_rings[ring]; qlcnic_83xx_enable_intr(adapter, sds_ring); } @@ -1354,7 +1359,7 @@ static int qlcnic_83xx_diag_alloc_res(struct net_device *netdev, int test, } static void qlcnic_83xx_diag_free_res(struct net_device *netdev, - int max_sds_rings) + u8 drv_sds_rings) { struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_host_sds_ring *sds_ring; @@ -1362,7 +1367,7 @@ static void qlcnic_83xx_diag_free_res(struct net_device *netdev, clear_bit(__QLCNIC_DEV_UP, &adapter->state); if (adapter->ahw->diag_test == QLCNIC_INTERRUPT_TEST) { - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &adapter->recv_ctx->sds_rings[ring]; qlcnic_83xx_disable_intr(adapter, sds_ring); if (!(adapter->flags & QLCNIC_MSIX_ENABLED)) @@ -1386,7 +1391,7 @@ static void qlcnic_83xx_diag_free_res(struct net_device *netdev, } } adapter->ahw->diag_test = 0; - adapter->max_sds_rings = max_sds_rings; + adapter->drv_sds_rings = drv_sds_rings; if (qlcnic_attach(adapter)) goto out; @@ -1648,7 +1653,9 @@ int qlcnic_83xx_loopback_test(struct net_device *netdev, u8 mode) { struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_hardware_context *ahw = adapter->ahw; - int ret = 0, loop = 0, max_sds_rings = adapter->max_sds_rings; + u8 drv_sds_rings = adapter->drv_sds_rings; + u8 drv_tx_rings = adapter->drv_tx_rings; + int ret = 0, loop = 0; if (ahw->op_mode == QLCNIC_NON_PRIV_FUNC) { netdev_warn(netdev, @@ -1670,7 +1677,7 @@ int qlcnic_83xx_loopback_test(struct net_device *netdev, u8 mode) mode == QLCNIC_ILB_MODE ? "internal" : "external"); ret = qlcnic_83xx_diag_alloc_res(netdev, QLCNIC_LOOPBACK_TEST, - max_sds_rings); + drv_sds_rings); if (ret) goto fail_diag_alloc; @@ -1708,10 +1715,11 @@ int qlcnic_83xx_loopback_test(struct net_device *netdev, u8 mode) qlcnic_83xx_clear_lb_mode(adapter, mode); free_diag_res: - qlcnic_83xx_diag_free_res(netdev, max_sds_rings); + qlcnic_83xx_diag_free_res(netdev, drv_sds_rings); fail_diag_alloc: - adapter->max_sds_rings = max_sds_rings; + adapter->drv_sds_rings = drv_sds_rings; + adapter->drv_tx_rings = drv_tx_rings; qlcnic_release_diag_lock(adapter); return ret; } @@ -2276,9 +2284,9 @@ int qlcnic_83xx_get_nic_info(struct qlcnic_adapter *adapter, temp = (cmd.rsp.arg[8] & 0x7FFE0000) >> 17; npar_info->max_linkspeed_reg_offset = temp; } - if (npar_info->capabilities & QLCNIC_FW_CAPABILITY_MORE_CAPS) - memcpy(ahw->extra_capability, &cmd.rsp.arg[16], - sizeof(ahw->extra_capability)); + + memcpy(ahw->extra_capability, &cmd.rsp.arg[16], + sizeof(ahw->extra_capability)); out: qlcnic_free_mbx_args(&cmd); @@ -3049,11 +3057,14 @@ int qlcnic_83xx_get_settings(struct qlcnic_adapter *adapter, int status = 0; struct qlcnic_hardware_context *ahw = adapter->ahw; - /* Get port configuration info */ - status = qlcnic_83xx_get_port_info(adapter); - /* Get Link Status related info */ - config = qlcnic_83xx_test_link(adapter); - ahw->module_type = QLC_83XX_SFP_MODULE_TYPE(config); + if (!test_bit(__QLCNIC_MAINTENANCE_MODE, &adapter->state)) { + /* Get port configuration info */ + status = qlcnic_83xx_get_port_info(adapter); + /* Get Link Status related info */ + config = qlcnic_83xx_test_link(adapter); + ahw->module_type = QLC_83XX_SFP_MODULE_TYPE(config); + } + /* hard code until there is a way to get it from flash */ ahw->board_type = QLCNIC_BRDTYPE_83XX_10G; @@ -3293,10 +3304,11 @@ int qlcnic_83xx_interrupt_test(struct net_device *netdev) struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_hardware_context *ahw = adapter->ahw; struct qlcnic_cmd_args cmd; + u8 val, drv_sds_rings = adapter->drv_sds_rings; + u8 drv_tx_rings = adapter->drv_tx_rings; u32 data; u16 intrpt_id, id; - u8 val; - int ret, max_sds_rings = adapter->max_sds_rings; + int ret; if (test_bit(__QLCNIC_RESETTING, &adapter->state)) { netdev_info(netdev, "Device is resetting\n"); @@ -3309,7 +3321,7 @@ int qlcnic_83xx_interrupt_test(struct net_device *netdev) } ret = qlcnic_83xx_diag_alloc_res(netdev, QLCNIC_INTERRUPT_TEST, - max_sds_rings); + drv_sds_rings); if (ret) goto fail_diag_irq; @@ -3346,10 +3358,11 @@ int qlcnic_83xx_interrupt_test(struct net_device *netdev) done: qlcnic_free_mbx_args(&cmd); - qlcnic_83xx_diag_free_res(netdev, max_sds_rings); + qlcnic_83xx_diag_free_res(netdev, drv_sds_rings); fail_diag_irq: - adapter->max_sds_rings = max_sds_rings; + adapter->drv_sds_rings = drv_sds_rings; + adapter->drv_tx_rings = drv_tx_rings; qlcnic_release_diag_lock(adapter); return ret; } @@ -3503,7 +3516,7 @@ int qlcnic_83xx_resume(struct qlcnic_adapter *adapter) if (err) return err; - if (ahw->nic_mode == QLC_83XX_VIRTUAL_NIC_MODE) { + if (ahw->nic_mode == QLCNIC_VNIC_MODE) { if (ahw->op_mode == QLCNIC_MGMT_FUNC) { qlcnic_83xx_set_vnic_opmode(adapter); } else { @@ -3530,6 +3543,9 @@ void qlcnic_83xx_reinit_mbx_work(struct qlcnic_mailbox *mbx) void qlcnic_83xx_free_mailbox(struct qlcnic_mailbox *mbx) { + if (!mbx) + return; + destroy_workqueue(mbx->work_q); kfree(mbx); } @@ -3650,6 +3666,9 @@ void qlcnic_83xx_detach_mailbox_work(struct qlcnic_adapter *adapter) { struct qlcnic_mailbox *mbx = adapter->ahw->mailbox; + if (!mbx) + return; + clear_bit(QLC_83XX_MBX_READY, &mbx->status); complete(&mbx->completion); cancel_work_sync(&mbx->work); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h index 9f4e4c4ab521..4cae6caa6bfa 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h @@ -61,7 +61,6 @@ #define QLC_83XX_HOST_SDS_MBX_IDX 8 #define QLCNIC_HOST_RDS_MBX_IDX 88 -#define QLCNIC_MAX_RING_SETS 8 /* Pause control registers */ #define QLC_83XX_SRE_SHIM_REG 0x0D200284 @@ -183,8 +182,8 @@ struct qlcnic_rcv_mbx_out { u8 num_pci_func; u8 state; #endif - u32 host_csmr[QLCNIC_MAX_RING_SETS]; - struct __host_producer_mbx host_prod[QLCNIC_MAX_RING_SETS]; + u32 host_csmr[QLCNIC_MAX_SDS_RINGS]; + struct __host_producer_mbx host_prod[QLCNIC_MAX_SDS_RINGS]; } __packed; struct qlcnic_add_rings_mbx_out { @@ -197,8 +196,8 @@ struct qlcnic_add_rings_mbx_out { u8 sts_num; u8 rcv_num; #endif - u32 host_csmr[QLCNIC_MAX_RING_SETS]; - struct __host_producer_mbx host_prod[QLCNIC_MAX_RING_SETS]; + u32 host_csmr[QLCNIC_MAX_SDS_RINGS]; + struct __host_producer_mbx host_prod[QLCNIC_MAX_SDS_RINGS]; } __packed; /* Transmit context mailbox inbox registers @@ -415,8 +414,6 @@ enum qlcnic_83xx_states { #define QLC_83XX_GET_VLAN_ALIGN_CAPABILITY(val) (val & 0x4000) #define QLC_83XX_GET_FW_LRO_MSS_CAPABILITY(val) (val & 0x20000) #define QLC_83XX_ESWITCH_CAPABILITY BIT_23 -#define QLC_83XX_VIRTUAL_NIC_MODE 0xFF -#define QLC_83XX_DEFAULT_MODE 0x0 #define QLC_83XX_SRIOV_MODE 0x1 #define QLCNIC_BRDTYPE_83XX_10G 0x0083 @@ -524,7 +521,7 @@ enum qlc_83xx_ext_regs { /* 83xx funcitons */ int qlcnic_83xx_get_fw_version(struct qlcnic_adapter *); int qlcnic_83xx_issue_cmd(struct qlcnic_adapter *, struct qlcnic_cmd_args *); -int qlcnic_83xx_setup_intr(struct qlcnic_adapter *, u8, int); +int qlcnic_83xx_setup_intr(struct qlcnic_adapter *); void qlcnic_83xx_get_func_no(struct qlcnic_adapter *); int qlcnic_83xx_cam_lock(struct qlcnic_adapter *); void qlcnic_83xx_cam_unlock(struct qlcnic_adapter *); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index e2cd48417041..89208e5b25d6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -902,7 +902,7 @@ static int qlcnic_83xx_idc_need_reset_state(struct qlcnic_adapter *adapter) qlcnic_83xx_idc_update_audit_reg(adapter, 0, 1); set_bit(__QLCNIC_RESETTING, &adapter->state); clear_bit(QLC_83XX_MBX_READY, &mbx->status); - if (adapter->ahw->nic_mode == QLC_83XX_VIRTUAL_NIC_MODE) + if (adapter->ahw->nic_mode == QLCNIC_VNIC_MODE) qlcnic_83xx_disable_vnic_mode(adapter, 1); if (qlcnic_check_diag_status(adapter)) { @@ -2033,6 +2033,8 @@ int qlcnic_83xx_get_nic_configuration(struct qlcnic_adapter *adapter) ahw->max_mac_filters = nic_info.max_mac_filters; ahw->max_mtu = nic_info.max_mtu; + adapter->max_tx_rings = ahw->max_tx_ques; + adapter->max_sds_rings = ahw->max_rx_ques; /* eSwitch capability indicates vNIC mode. * vNIC and SRIOV are mutually exclusive operational modes. * If SR-IOV capability is detected, SR-IOV physical function @@ -2045,7 +2047,7 @@ int qlcnic_83xx_get_nic_configuration(struct qlcnic_adapter *adapter) return QLC_83XX_DEFAULT_OPMODE; if (ahw->capabilities & QLC_83XX_ESWITCH_CAPABILITY) - return QLC_83XX_VIRTUAL_NIC_MODE; + return QLCNIC_VNIC_MODE; return QLC_83XX_DEFAULT_OPMODE; } @@ -2059,15 +2061,20 @@ int qlcnic_83xx_configure_opmode(struct qlcnic_adapter *adapter) if (ret == -EIO) return -EIO; - if (ret == QLC_83XX_VIRTUAL_NIC_MODE) { - ahw->nic_mode = QLC_83XX_VIRTUAL_NIC_MODE; + if (ret == QLCNIC_VNIC_MODE) { + ahw->nic_mode = QLCNIC_VNIC_MODE; + if (qlcnic_83xx_config_vnic_opmode(adapter)) return -EIO; + adapter->max_sds_rings = QLCNIC_MAX_VNIC_SDS_RINGS; + adapter->max_tx_rings = QLCNIC_MAX_VNIC_TX_RINGS; } else if (ret == QLC_83XX_DEFAULT_OPMODE) { - ahw->nic_mode = QLC_83XX_DEFAULT_MODE; + ahw->nic_mode = QLCNIC_DEFAULT_MODE; adapter->nic_ops->init_driver = qlcnic_83xx_init_default_driver; ahw->idc.state_entry = qlcnic_83xx_idc_ready_state_entry; + adapter->max_sds_rings = ahw->max_rx_ques; + adapter->max_tx_rings = ahw->max_tx_ques; } else { return -EIO; } @@ -2170,6 +2177,23 @@ static int qlcnic_83xx_get_fw_info(struct qlcnic_adapter *adapter) return err; } +static void qlcnic_83xx_init_rings(struct qlcnic_adapter *adapter) +{ + u8 rx_cnt = QLCNIC_DEF_SDS_RINGS; + u8 tx_cnt = QLCNIC_DEF_TX_RINGS; + + adapter->max_tx_rings = QLCNIC_MAX_TX_RINGS; + adapter->max_sds_rings = QLCNIC_MAX_SDS_RINGS; + + if (!adapter->ahw->msix_supported) { + rx_cnt = QLCNIC_SINGLE_RING; + tx_cnt = QLCNIC_SINGLE_RING; + } + + /* compute and set drv sds rings */ + qlcnic_set_tx_ring_count(adapter, tx_cnt); + qlcnic_set_sds_ring_count(adapter, rx_cnt); +} int qlcnic_83xx_init(struct qlcnic_adapter *adapter, int pci_using_dac) { @@ -2178,6 +2202,9 @@ int qlcnic_83xx_init(struct qlcnic_adapter *adapter, int pci_using_dac) int err = 0; ahw->msix_supported = !!qlcnic_use_msi_x; + + qlcnic_83xx_init_rings(adapter); + err = qlcnic_83xx_init_mailbox_work(adapter); if (err) goto exit; @@ -2190,22 +2217,26 @@ int qlcnic_83xx_init(struct qlcnic_adapter *adapter, int pci_using_dac) return err; } + if (qlcnic_83xx_read_flash_descriptor_table(adapter) || + qlcnic_83xx_read_flash_mfg_id(adapter)) { + dev_err(&adapter->pdev->dev, "Failed reading flash mfg id\n"); + err = -ENOTRECOVERABLE; + goto detach_mbx; + } + err = qlcnic_83xx_check_hw_status(adapter); if (err) goto detach_mbx; - if (!qlcnic_83xx_read_flash_descriptor_table(adapter)) - qlcnic_83xx_read_flash_mfg_id(adapter); - err = qlcnic_83xx_get_fw_info(adapter); if (err) goto detach_mbx; err = qlcnic_83xx_idc_init(adapter); if (err) - goto clear_fw_info; + goto detach_mbx; - err = qlcnic_setup_intr(adapter, 0, 0); + err = qlcnic_setup_intr(adapter); if (err) { dev_err(&adapter->pdev->dev, "Failed to setup interrupt\n"); goto disable_intr; @@ -2227,6 +2258,7 @@ int qlcnic_83xx_init(struct qlcnic_adapter *adapter, int pci_using_dac) if (err) goto disable_mbx_intr; + /* Perform operating mode specific initialization */ err = adapter->nic_ops->init_driver(adapter); if (err) @@ -2247,12 +2279,10 @@ disable_mbx_intr: disable_intr: qlcnic_teardown_intr(adapter); -clear_fw_info: - kfree(ahw->fw_info); - detach_mbx: qlcnic_83xx_detach_mailbox_work(adapter); qlcnic_83xx_free_mailbox(ahw->mailbox); + ahw->mailbox = NULL; exit: return err; } @@ -2265,7 +2295,7 @@ void qlcnic_83xx_aer_stop_poll_work(struct qlcnic_adapter *adapter) clear_bit(QLC_83XX_MBX_READY, &idc->status); cancel_delayed_work_sync(&adapter->fw_work); - if (ahw->nic_mode == QLC_83XX_VIRTUAL_NIC_MODE) + if (ahw->nic_mode == QLCNIC_VNIC_MODE) qlcnic_83xx_disable_vnic_mode(adapter, 1); qlcnic_83xx_idc_detach_driver(adapter); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c index 86850dd633a1..859cb161fc63 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c @@ -270,7 +270,7 @@ int qlcnic_82xx_fw_cmd_create_rx_ctx(struct qlcnic_adapter *adapter) int err; nrds_rings = adapter->max_rds_rings; - nsds_rings = adapter->max_sds_rings; + nsds_rings = adapter->drv_sds_rings; rq_size = SIZEOF_HOSTRQ_RX(struct qlcnic_hostrq_rx_ctx, nrds_rings, nsds_rings); @@ -475,7 +475,7 @@ int qlcnic_82xx_fw_cmd_create_tx_ctx(struct qlcnic_adapter *adapter, if (qlcnic_check_multi_tx(adapter) && !adapter->ahw->diag_test) { - temp_nsds_rings = adapter->max_sds_rings; + temp_nsds_rings = adapter->drv_sds_rings; index = temp_nsds_rings + ring; msix_id = ahw->intr_tbl[index].id; prq->msi_index = cpu_to_le16(msix_id); @@ -512,7 +512,7 @@ int qlcnic_82xx_fw_cmd_create_tx_ctx(struct qlcnic_adapter *adapter, if (qlcnic_check_multi_tx(adapter) && !adapter->ahw->diag_test && (adapter->flags & QLCNIC_MSIX_ENABLED)) { - index = adapter->max_sds_rings + ring; + index = adapter->drv_sds_rings + ring; intr_mask = ahw->intr_tbl[index].src; tx_ring->crb_intr_mask = ahw->pci_base0 + intr_mask; } @@ -582,7 +582,7 @@ int qlcnic_alloc_hw_resources(struct qlcnic_adapter *adapter) recv_ctx = adapter->recv_ctx; - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; ptr = (__le32 *)dma_alloc_coherent(&pdev->dev, sizeof(u32), &tx_ring->hw_cons_phys_addr, @@ -616,7 +616,7 @@ int qlcnic_alloc_hw_resources(struct qlcnic_adapter *adapter) } - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; addr = dma_alloc_coherent(&adapter->pdev->dev, @@ -664,7 +664,7 @@ int qlcnic_fw_create_ctx(struct qlcnic_adapter *dev) if (err) goto err_out; - for (ring = 0; ring < dev->max_drv_tx_rings; ring++) { + for (ring = 0; ring < dev->drv_tx_rings; ring++) { err = qlcnic_fw_cmd_create_tx_ctx(dev, &dev->tx_ring[ring], ring); @@ -703,7 +703,7 @@ void qlcnic_fw_destroy_ctx(struct qlcnic_adapter *adapter) if (test_and_clear_bit(__QLCNIC_FW_ATTACHED, &adapter->state)) { qlcnic_fw_cmd_del_rx_ctx(adapter); - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) + for (ring = 0; ring < adapter->drv_tx_rings; ring++) qlcnic_fw_cmd_del_tx_ctx(adapter, &adapter->tx_ring[ring]); @@ -733,7 +733,7 @@ void qlcnic_free_hw_resources(struct qlcnic_adapter *adapter) recv_ctx = adapter->recv_ctx; - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; if (tx_ring->hw_consumer != NULL) { dma_free_coherent(&adapter->pdev->dev, sizeof(u32), @@ -764,7 +764,7 @@ void qlcnic_free_hw_resources(struct qlcnic_adapter *adapter) } } - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; if (sds_ring->desc_head != NULL) { @@ -895,6 +895,8 @@ int qlcnic_82xx_get_nic_info(struct qlcnic_adapter *adapter, npar_info->max_rx_ques = le16_to_cpu(nic_info->max_rx_ques); npar_info->capabilities = le32_to_cpu(nic_info->capabilities); npar_info->max_mtu = le16_to_cpu(nic_info->max_mtu); + adapter->max_tx_rings = npar_info->max_tx_ques; + adapter->max_sds_rings = npar_info->max_rx_ques; } qlcnic_free_mbx_args(&cmd); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index b2a8805997ca..b36c02fafcfd 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -27,43 +27,36 @@ static const u32 qlcnic_fw_dump_level[] = { }; static const struct qlcnic_stats qlcnic_gstrings_stats[] = { + {"xmit_on", QLC_SIZEOF(stats.xmit_on), QLC_OFF(stats.xmit_on)}, + {"xmit_off", QLC_SIZEOF(stats.xmit_off), QLC_OFF(stats.xmit_off)}, {"xmit_called", QLC_SIZEOF(stats.xmitcalled), - QLC_OFF(stats.xmitcalled)}, + QLC_OFF(stats.xmitcalled)}, {"xmit_finished", QLC_SIZEOF(stats.xmitfinished), - QLC_OFF(stats.xmitfinished)}, - {"rx_dropped", QLC_SIZEOF(stats.rxdropped), QLC_OFF(stats.rxdropped)}, + QLC_OFF(stats.xmitfinished)}, + {"tx dma map error", QLC_SIZEOF(stats.tx_dma_map_error), + QLC_OFF(stats.tx_dma_map_error)}, + {"tx_bytes", QLC_SIZEOF(stats.txbytes), QLC_OFF(stats.txbytes)}, {"tx_dropped", QLC_SIZEOF(stats.txdropped), QLC_OFF(stats.txdropped)}, - {"csummed", QLC_SIZEOF(stats.csummed), QLC_OFF(stats.csummed)}, + {"rx dma map error", QLC_SIZEOF(stats.rx_dma_map_error), + QLC_OFF(stats.rx_dma_map_error)}, {"rx_pkts", QLC_SIZEOF(stats.rx_pkts), QLC_OFF(stats.rx_pkts)}, - {"lro_pkts", QLC_SIZEOF(stats.lro_pkts), QLC_OFF(stats.lro_pkts)}, {"rx_bytes", QLC_SIZEOF(stats.rxbytes), QLC_OFF(stats.rxbytes)}, - {"tx_bytes", QLC_SIZEOF(stats.txbytes), QLC_OFF(stats.txbytes)}, + {"rx_dropped", QLC_SIZEOF(stats.rxdropped), QLC_OFF(stats.rxdropped)}, + {"null rxbuf", QLC_SIZEOF(stats.null_rxbuf), QLC_OFF(stats.null_rxbuf)}, + {"csummed", QLC_SIZEOF(stats.csummed), QLC_OFF(stats.csummed)}, + {"lro_pkts", QLC_SIZEOF(stats.lro_pkts), QLC_OFF(stats.lro_pkts)}, {"lrobytes", QLC_SIZEOF(stats.lrobytes), QLC_OFF(stats.lrobytes)}, {"lso_frames", QLC_SIZEOF(stats.lso_frames), QLC_OFF(stats.lso_frames)}, - {"xmit_on", QLC_SIZEOF(stats.xmit_on), QLC_OFF(stats.xmit_on)}, - {"xmit_off", QLC_SIZEOF(stats.xmit_off), QLC_OFF(stats.xmit_off)}, {"skb_alloc_failure", QLC_SIZEOF(stats.skb_alloc_failure), QLC_OFF(stats.skb_alloc_failure)}, - {"null rxbuf", QLC_SIZEOF(stats.null_rxbuf), QLC_OFF(stats.null_rxbuf)}, - {"rx dma map error", QLC_SIZEOF(stats.rx_dma_map_error), - QLC_OFF(stats.rx_dma_map_error)}, - {"tx dma map error", QLC_SIZEOF(stats.tx_dma_map_error), - QLC_OFF(stats.tx_dma_map_error)}, {"mac_filter_limit_overrun", QLC_SIZEOF(stats.mac_filter_limit_overrun), - QLC_OFF(stats.mac_filter_limit_overrun)}, + QLC_OFF(stats.mac_filter_limit_overrun)}, {"spurious intr", QLC_SIZEOF(stats.spurious_intr), QLC_OFF(stats.spurious_intr)}, }; static const char qlcnic_device_gstrings_stats[][ETH_GSTRING_LEN] = { - "rx unicast frames", - "rx multicast frames", - "rx broadcast frames", - "rx dropped frames", - "rx errors", - "rx local frames", - "rx numbytes", "tx unicast frames", "tx multicast frames", "tx broadcast frames", @@ -71,6 +64,13 @@ static const char qlcnic_device_gstrings_stats[][ETH_GSTRING_LEN] = { "tx errors", "tx local frames", "tx numbytes", + "rx unicast frames", + "rx multicast frames", + "rx broadcast frames", + "rx dropped frames", + "rx errors", + "rx local frames", + "rx numbytes", }; static const char qlcnic_83xx_tx_stats_strings[][ETH_GSTRING_LEN] = { @@ -126,13 +126,16 @@ static const char qlcnic_83xx_mac_stats_strings[][ETH_GSTRING_LEN] = { #define QLCNIC_STATS_LEN ARRAY_SIZE(qlcnic_gstrings_stats) -static const char qlcnic_tx_ring_stats_strings[][ETH_GSTRING_LEN] = { +static const char qlcnic_tx_queue_stats_strings[][ETH_GSTRING_LEN] = { "xmit_on", "xmit_off", "xmit_called", "xmit_finished", + "tx_bytes", }; +#define QLCNIC_TX_STATS_LEN ARRAY_SIZE(qlcnic_tx_queue_stats_strings) + static const char qlcnic_83xx_rx_stats_strings[][ETH_GSTRING_LEN] = { "ctx_rx_bytes", "ctx_rx_pkts", @@ -223,9 +226,9 @@ static const u32 ext_diag_registers[] = { static inline int qlcnic_get_ring_regs_len(struct qlcnic_adapter *adapter) { - int ring_regs_cnt = (adapter->max_drv_tx_rings * 5) + + int ring_regs_cnt = (adapter->drv_tx_rings * 5) + (adapter->max_rds_rings * 2) + - (adapter->max_sds_rings * 3) + 5; + (adapter->drv_sds_rings * 3) + 5; return ring_regs_cnt * sizeof(u32); } @@ -527,8 +530,8 @@ qlcnic_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *p) /* Marker btw regs and TX ring count */ regs_buff[i++] = 0xFFEFCDAB; - regs_buff[i++] = adapter->max_drv_tx_rings; /* No. of TX ring */ - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { + regs_buff[i++] = adapter->drv_tx_rings; /* No. of TX ring */ + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; regs_buff[i++] = le32_to_cpu(*(tx_ring->hw_consumer)); regs_buff[i++] = tx_ring->sw_consumer; @@ -547,8 +550,8 @@ qlcnic_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *p) regs_buff[i++] = rds_rings->producer; } - regs_buff[i++] = adapter->max_sds_rings; /* No. of SDS ring */ - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + regs_buff[i++] = adapter->drv_sds_rings; /* No. of SDS ring */ + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &(recv_ctx->sds_rings[ring]); regs_buff[i++] = readl(sds_ring->crb_sts_consumer); regs_buff[i++] = sds_ring->consumer; @@ -661,46 +664,88 @@ qlcnic_set_ringparam(struct net_device *dev, return qlcnic_reset_context(adapter); } +static int qlcnic_validate_ring_count(struct qlcnic_adapter *adapter, + u8 rx_ring, u8 tx_ring) +{ + if (rx_ring != 0) { + if (rx_ring > adapter->max_sds_rings) { + netdev_err(adapter->netdev, "Invalid ring count, SDS ring count %d should not be greater than max %d driver sds rings.\n", + rx_ring, adapter->max_sds_rings); + return -EINVAL; + } + } + + if (tx_ring != 0) { + if (qlcnic_82xx_check(adapter) && + (tx_ring > adapter->max_tx_rings)) { + netdev_err(adapter->netdev, + "Invalid ring count, Tx ring count %d should not be greater than max %d driver Tx rings.\n", + tx_ring, adapter->max_tx_rings); + return -EINVAL; + } + + if (qlcnic_83xx_check(adapter) && + (tx_ring > QLCNIC_SINGLE_RING)) { + netdev_err(adapter->netdev, + "Invalid ring count, Tx ring count %d should not be greater than %d driver Tx rings.\n", + tx_ring, QLCNIC_SINGLE_RING); + return -EINVAL; + } + } + + return 0; +} + static void qlcnic_get_channels(struct net_device *dev, struct ethtool_channels *channel) { struct qlcnic_adapter *adapter = netdev_priv(dev); - int min; - - min = min_t(int, adapter->ahw->max_rx_ques, num_online_cpus()); - channel->max_rx = rounddown_pow_of_two(min); - channel->max_tx = min_t(int, QLCNIC_MAX_TX_RINGS, num_online_cpus()); - channel->rx_count = adapter->max_sds_rings; - channel->tx_count = adapter->max_drv_tx_rings; + channel->max_rx = adapter->max_sds_rings; + channel->max_tx = adapter->max_tx_rings; + channel->rx_count = adapter->drv_sds_rings; + channel->tx_count = adapter->drv_tx_rings; } static int qlcnic_set_channels(struct net_device *dev, - struct ethtool_channels *channel) + struct ethtool_channels *channel) { struct qlcnic_adapter *adapter = netdev_priv(dev); int err; - int txq = 0; if (channel->other_count || channel->combined_count) return -EINVAL; + err = qlcnic_validate_ring_count(adapter, channel->rx_count, + channel->tx_count); + if (err) + return err; + if (channel->rx_count) { - err = qlcnic_validate_max_rss(adapter, channel->rx_count); - if (err) + err = qlcnic_validate_rings(adapter, channel->rx_count, + QLCNIC_RX_QUEUE); + if (err) { + netdev_err(dev, "Unable to configure %u SDS rings\n", + channel->rx_count); return err; + } } - if (qlcnic_82xx_check(adapter) && channel->tx_count) { - err = qlcnic_validate_max_tx_rings(adapter, channel->tx_count); - if (err) + if (channel->tx_count) { + err = qlcnic_validate_rings(adapter, channel->tx_count, + QLCNIC_TX_QUEUE); + if (err) { + netdev_err(dev, "Unable to configure %u Tx rings\n", + channel->tx_count); return err; - txq = channel->tx_count; + } } - err = qlcnic_set_max_rss(adapter, channel->rx_count, txq); - netdev_info(dev, "allocated 0x%x sds rings and 0x%x tx rings\n", - adapter->max_sds_rings, adapter->max_drv_tx_rings); + err = qlcnic_setup_rings(adapter, channel->rx_count, + channel->tx_count); + netdev_info(dev, "Allocated %d SDS rings and %d Tx rings\n", + adapter->drv_sds_rings, adapter->drv_tx_rings); + return err; } @@ -902,7 +947,7 @@ static int qlcnic_irq_test(struct net_device *netdev) struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_hardware_context *ahw = adapter->ahw; struct qlcnic_cmd_args cmd; - int ret, max_sds_rings = adapter->max_sds_rings; + int ret, drv_sds_rings = adapter->drv_sds_rings; if (qlcnic_83xx_check(adapter)) return qlcnic_83xx_interrupt_test(netdev); @@ -931,10 +976,10 @@ done: qlcnic_free_mbx_args(&cmd); free_diag_res: - qlcnic_diag_free_res(netdev, max_sds_rings); + qlcnic_diag_free_res(netdev, drv_sds_rings); clear_diag_irq: - adapter->max_sds_rings = max_sds_rings; + adapter->drv_sds_rings = drv_sds_rings; clear_bit(__QLCNIC_RESETTING, &adapter->state); return ret; @@ -1010,8 +1055,8 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode) int qlcnic_loopback_test(struct net_device *netdev, u8 mode) { struct qlcnic_adapter *adapter = netdev_priv(netdev); - int max_drv_tx_rings = adapter->max_drv_tx_rings; - int max_sds_rings = adapter->max_sds_rings; + int drv_tx_rings = adapter->drv_tx_rings; + int drv_sds_rings = adapter->drv_sds_rings; struct qlcnic_host_sds_ring *sds_ring; struct qlcnic_hardware_context *ahw = adapter->ahw; int loop = 0; @@ -1066,11 +1111,11 @@ int qlcnic_loopback_test(struct net_device *netdev, u8 mode) qlcnic_clear_lb_mode(adapter, mode); free_res: - qlcnic_diag_free_res(netdev, max_sds_rings); + qlcnic_diag_free_res(netdev, drv_sds_rings); clear_it: - adapter->max_sds_rings = max_sds_rings; - adapter->max_drv_tx_rings = max_drv_tx_rings; + adapter->drv_sds_rings = drv_sds_rings; + adapter->drv_tx_rings = drv_tx_rings; clear_bit(__QLCNIC_RESETTING, &adapter->state); return ret; } @@ -1123,11 +1168,11 @@ qlcnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) QLCNIC_TEST_LEN * ETH_GSTRING_LEN); break; case ETH_SS_STATS: - num_stats = ARRAY_SIZE(qlcnic_tx_ring_stats_strings); - for (i = 0; i < adapter->max_drv_tx_rings; i++) { + num_stats = ARRAY_SIZE(qlcnic_tx_queue_stats_strings); + for (i = 0; i < adapter->drv_tx_rings; i++) { for (index = 0; index < num_stats; index++) { - sprintf(data, "tx_ring_%d %s", i, - qlcnic_tx_ring_stats_strings[index]); + sprintf(data, "tx_queue_%d %s", i, + qlcnic_tx_queue_stats_strings[index]); data += ETH_GSTRING_LEN; } } @@ -1225,6 +1270,36 @@ static u64 *qlcnic_fill_stats(u64 *data, void *stats, int type) return data; } +static void qlcnic_update_stats(struct qlcnic_adapter *adapter) +{ + struct qlcnic_host_tx_ring *tx_ring; + int ring; + + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { + tx_ring = &adapter->tx_ring[ring]; + adapter->stats.xmit_on += tx_ring->tx_stats.xmit_on; + adapter->stats.xmit_off += tx_ring->tx_stats.xmit_off; + adapter->stats.xmitcalled += tx_ring->tx_stats.xmit_called; + adapter->stats.xmitfinished += tx_ring->tx_stats.xmit_finished; + adapter->stats.txbytes += tx_ring->tx_stats.tx_bytes; + } +} + +static u64 *qlcnic_fill_tx_queue_stats(u64 *data, void *stats) +{ + struct qlcnic_host_tx_ring *tx_ring; + + tx_ring = (struct qlcnic_host_tx_ring *)stats; + + *data++ = QLCNIC_FILL_STATS(tx_ring->tx_stats.xmit_on); + *data++ = QLCNIC_FILL_STATS(tx_ring->tx_stats.xmit_off); + *data++ = QLCNIC_FILL_STATS(tx_ring->tx_stats.xmit_called); + *data++ = QLCNIC_FILL_STATS(tx_ring->tx_stats.xmit_finished); + *data++ = QLCNIC_FILL_STATS(tx_ring->tx_stats.tx_bytes); + + return data; +} + static void qlcnic_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { @@ -1232,19 +1307,20 @@ static void qlcnic_get_ethtool_stats(struct net_device *dev, struct qlcnic_host_tx_ring *tx_ring; struct qlcnic_esw_statistics port_stats; struct qlcnic_mac_statistics mac_stats; - int index, ret, length, size, ring; + int index, ret, length, size, tx_size, ring; char *p; - memset(data, 0, adapter->max_drv_tx_rings * 4 * sizeof(u64)); - for (ring = 0, index = 0; ring < adapter->max_drv_tx_rings; ring++) { + tx_size = adapter->drv_tx_rings * QLCNIC_TX_STATS_LEN; + + memset(data, 0, tx_size * sizeof(u64)); + for (ring = 0, index = 0; ring < adapter->drv_tx_rings; ring++) { if (test_bit(__QLCNIC_DEV_UP, &adapter->state)) { tx_ring = &adapter->tx_ring[ring]; - *data++ = tx_ring->xmit_on; - *data++ = tx_ring->xmit_off; - *data++ = tx_ring->xmit_called; - *data++ = tx_ring->xmit_finished; + data = qlcnic_fill_tx_queue_stats(data, tx_ring); + qlcnic_update_stats(adapter); } } + memset(data, 0, stats->n_stats * sizeof(u64)); length = QLCNIC_STATS_LEN; for (index = 0; index < length; index++) { @@ -1286,7 +1362,7 @@ static int qlcnic_set_led(struct net_device *dev, enum ethtool_phys_id_state state) { struct qlcnic_adapter *adapter = netdev_priv(dev); - int max_sds_rings = adapter->max_sds_rings; + int drv_sds_rings = adapter->drv_sds_rings; int err = -EIO, active = 1; if (qlcnic_83xx_check(adapter)) @@ -1344,7 +1420,7 @@ static int qlcnic_set_led(struct net_device *dev, } if (test_and_clear_bit(__QLCNIC_DIAG_RES_ALLOC, &adapter->state)) - qlcnic_diag_free_res(dev, max_sds_rings); + qlcnic_diag_free_res(dev, drv_sds_rings); if (!active || err) clear_bit(__QLCNIC_LED_ENABLE, &adapter->state); @@ -1685,7 +1761,6 @@ qlcnic_set_dump(struct net_device *netdev, struct ethtool_dump *val) struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump; bool valid_mask = false; int i, ret = 0; - u32 state; switch (val->flag) { case QLCNIC_FORCE_FW_DUMP_KEY: @@ -1738,9 +1813,8 @@ qlcnic_set_dump(struct net_device *netdev, struct ethtool_dump *val) case QLCNIC_SET_QUIESCENT: case QLCNIC_RESET_QUIESCENT: - state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE); - if (state == QLCNIC_DEV_FAILED || (state == QLCNIC_DEV_BADBAD)) - netdev_info(netdev, "Device in FAILED state\n"); + if (test_bit(__QLCNIC_MAINTENANCE_MODE, &adapter->state)) + netdev_info(netdev, "Device is in non-operational state\n"); break; default: diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c index 73e72eb83bdf..6f7f60c09f07 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c @@ -785,8 +785,6 @@ void qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *adapter) #define QLCNIC_ENABLE_IPV4_LRO 1 #define QLCNIC_ENABLE_IPV6_LRO 2 -#define QLCNIC_NO_DEST_IPV4_CHECK (1 << 8) -#define QLCNIC_NO_DEST_IPV6_CHECK (2 << 8) int qlcnic_82xx_config_hw_lro(struct qlcnic_adapter *adapter, int enable) { @@ -806,11 +804,10 @@ int qlcnic_82xx_config_hw_lro(struct qlcnic_adapter *adapter, int enable) word = 0; if (enable) { - word = QLCNIC_ENABLE_IPV4_LRO | QLCNIC_NO_DEST_IPV4_CHECK; + word = QLCNIC_ENABLE_IPV4_LRO; if (adapter->ahw->extra_capability[0] & QLCNIC_FW_CAP2_HW_LRO_IPV6) - word |= QLCNIC_ENABLE_IPV6_LRO | - QLCNIC_NO_DEST_IPV6_CHECK; + word |= QLCNIC_ENABLE_IPV6_LRO; } req.words[0] = cpu_to_le64(word); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h index 272c356cf9b2..13303e7d1ed7 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h @@ -146,6 +146,12 @@ struct qlcnic_mailbox_metadata { #define QLCNIC_MBX_PORT_RSP_OK 0x1a #define QLCNIC_MBX_ASYNC_EVENT BIT_15 +/* Set HW Tx ring limit for 82xx adapter. */ +#define QLCNIC_MAX_HW_TX_RINGS 8 +#define QLCNIC_MAX_HW_VNIC_TX_RINGS 4 +#define QLCNIC_MAX_TX_RINGS 8 +#define QLCNIC_MAX_SDS_RINGS 8 + struct qlcnic_pci_info; struct qlcnic_info; struct qlcnic_cmd_args; @@ -176,7 +182,7 @@ int qlcnic_82xx_set_lb_mode(struct qlcnic_adapter *, u8); void qlcnic_82xx_write_crb(struct qlcnic_adapter *, char *, loff_t, size_t); void qlcnic_82xx_read_crb(struct qlcnic_adapter *, char *, loff_t, size_t); void qlcnic_82xx_dev_request_reset(struct qlcnic_adapter *, u32); -int qlcnic_82xx_setup_intr(struct qlcnic_adapter *, u8, int); +int qlcnic_82xx_setup_intr(struct qlcnic_adapter *); irqreturn_t qlcnic_82xx_clear_legacy_intr(struct qlcnic_adapter *); int qlcnic_82xx_issue_cmd(struct qlcnic_adapter *adapter, struct qlcnic_cmd_args *); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c index 66c26cf7a2b8..e9c21e5d0ca9 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c @@ -236,7 +236,7 @@ int qlcnic_alloc_sw_resources(struct qlcnic_adapter *adapter) spin_lock_init(&rds_ring->lock); } - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; sds_ring->irq = adapter->msix_entries[ring].vector; sds_ring->adapter = adapter; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 897627dd1d04..0149c9495347 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -581,10 +581,7 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) goto drop_packet; } - if (qlcnic_check_multi_tx(adapter)) - tx_ring = &adapter->tx_ring[skb_get_queue_mapping(skb)]; - else - tx_ring = &adapter->tx_ring[0]; + tx_ring = &adapter->tx_ring[skb_get_queue_mapping(skb)]; num_txd = tx_ring->num_desc; frag_count = skb_shinfo(skb)->nr_frags + 1; @@ -607,8 +604,7 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (qlcnic_tx_avail(tx_ring) > TX_STOP_THRESH) { netif_tx_start_queue(tx_ring->txq); } else { - adapter->stats.xmit_off++; - tx_ring->xmit_off++; + tx_ring->tx_stats.xmit_off++; return NETDEV_TX_BUSY; } } @@ -669,9 +665,8 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (adapter->drv_mac_learn) qlcnic_send_filter(adapter, first_desc, skb); - adapter->stats.txbytes += skb->len; - adapter->stats.xmitcalled++; - tx_ring->xmit_called++; + tx_ring->tx_stats.tx_bytes += skb->len; + tx_ring->tx_stats.xmit_called++; qlcnic_update_cmd_producer(tx_ring); @@ -789,6 +784,9 @@ static int qlcnic_process_cmd_ring(struct qlcnic_adapter *adapter, struct net_device *netdev = adapter->netdev; struct qlcnic_skb_frag *frag; + if (!spin_trylock(&adapter->tx_clean_lock)) + return 1; + sw_consumer = tx_ring->sw_consumer; hw_consumer = le32_to_cpu(*(tx_ring->hw_consumer)); @@ -805,8 +803,7 @@ static int qlcnic_process_cmd_ring(struct qlcnic_adapter *adapter, PCI_DMA_TODEVICE); frag->dma = 0ULL; } - adapter->stats.xmitfinished++; - tx_ring->xmit_finished++; + tx_ring->tx_stats.xmit_finished++; dev_kfree_skb_any(buffer->skb); buffer->skb = NULL; } @@ -823,8 +820,7 @@ static int qlcnic_process_cmd_ring(struct qlcnic_adapter *adapter, netif_carrier_ok(netdev)) { if (qlcnic_tx_avail(tx_ring) > TX_STOP_THRESH) { netif_tx_wake_queue(tx_ring->txq); - adapter->stats.xmit_on++; - tx_ring->xmit_on++; + tx_ring->tx_stats.xmit_on++; } } adapter->tx_timeo_cnt = 0; @@ -844,6 +840,7 @@ static int qlcnic_process_cmd_ring(struct qlcnic_adapter *adapter, */ hw_consumer = le32_to_cpu(*(tx_ring->hw_consumer)); done = (sw_consumer == hw_consumer); + spin_unlock(&adapter->tx_clean_lock); return done; } @@ -1463,18 +1460,18 @@ int qlcnic_82xx_napi_add(struct qlcnic_adapter *adapter, struct qlcnic_recv_context *recv_ctx = adapter->recv_ctx; struct qlcnic_host_tx_ring *tx_ring; - if (qlcnic_alloc_sds_rings(recv_ctx, adapter->max_sds_rings)) + if (qlcnic_alloc_sds_rings(recv_ctx, adapter->drv_sds_rings)) return -ENOMEM; - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; if (qlcnic_check_multi_tx(adapter) && !adapter->ahw->diag_test && - (adapter->max_drv_tx_rings > 1)) { + (adapter->drv_tx_rings > QLCNIC_SINGLE_RING)) { netif_napi_add(netdev, &sds_ring->napi, qlcnic_rx_poll, NAPI_POLL_WEIGHT); } else { - if (ring == (adapter->max_sds_rings - 1)) + if (ring == (adapter->drv_sds_rings - 1)) netif_napi_add(netdev, &sds_ring->napi, qlcnic_poll, NAPI_POLL_WEIGHT); @@ -1491,7 +1488,7 @@ int qlcnic_82xx_napi_add(struct qlcnic_adapter *adapter, } if (qlcnic_check_multi_tx(adapter) && !adapter->ahw->diag_test) { - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; netif_napi_add(netdev, &tx_ring->napi, qlcnic_tx_poll, NAPI_POLL_WEIGHT); @@ -1508,7 +1505,7 @@ void qlcnic_82xx_napi_del(struct qlcnic_adapter *adapter) struct qlcnic_recv_context *recv_ctx = adapter->recv_ctx; struct qlcnic_host_tx_ring *tx_ring; - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; netif_napi_del(&sds_ring->napi); } @@ -1516,7 +1513,7 @@ void qlcnic_82xx_napi_del(struct qlcnic_adapter *adapter) qlcnic_free_sds_rings(adapter->recv_ctx); if (qlcnic_check_multi_tx(adapter) && !adapter->ahw->diag_test) { - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; netif_napi_del(&tx_ring->napi); } @@ -1535,7 +1532,7 @@ void qlcnic_82xx_napi_enable(struct qlcnic_adapter *adapter) if (adapter->is_up != QLCNIC_ADAPTER_UP_MAGIC) return; - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; napi_enable(&sds_ring->napi); qlcnic_enable_int(sds_ring); @@ -1544,8 +1541,8 @@ void qlcnic_82xx_napi_enable(struct qlcnic_adapter *adapter) if (qlcnic_check_multi_tx(adapter) && (adapter->flags & QLCNIC_MSIX_ENABLED) && !adapter->ahw->diag_test && - (adapter->max_drv_tx_rings > 1)) { - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { + (adapter->drv_tx_rings > QLCNIC_SINGLE_RING)) { + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; napi_enable(&tx_ring->napi); qlcnic_enable_tx_intr(adapter, tx_ring); @@ -1563,7 +1560,7 @@ void qlcnic_82xx_napi_disable(struct qlcnic_adapter *adapter) if (adapter->is_up != QLCNIC_ADAPTER_UP_MAGIC) return; - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; qlcnic_disable_int(sds_ring); napi_synchronize(&sds_ring->napi); @@ -1573,7 +1570,7 @@ void qlcnic_82xx_napi_disable(struct qlcnic_adapter *adapter) if ((adapter->flags & QLCNIC_MSIX_ENABLED) && !adapter->ahw->diag_test && qlcnic_check_multi_tx(adapter)) { - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; qlcnic_disable_tx_int(adapter, tx_ring); napi_synchronize(&tx_ring->napi); @@ -1911,7 +1908,7 @@ void qlcnic_83xx_napi_enable(struct qlcnic_adapter *adapter) if (adapter->is_up != QLCNIC_ADAPTER_UP_MAGIC) return; - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; napi_enable(&sds_ring->napi); if (adapter->flags & QLCNIC_MSIX_ENABLED) @@ -1920,7 +1917,7 @@ void qlcnic_83xx_napi_enable(struct qlcnic_adapter *adapter) if ((adapter->flags & QLCNIC_MSIX_ENABLED) && !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; napi_enable(&tx_ring->napi); qlcnic_83xx_enable_tx_intr(adapter, tx_ring); @@ -1938,7 +1935,7 @@ void qlcnic_83xx_napi_disable(struct qlcnic_adapter *adapter) if (adapter->is_up != QLCNIC_ADAPTER_UP_MAGIC) return; - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; if (adapter->flags & QLCNIC_MSIX_ENABLED) qlcnic_83xx_disable_intr(adapter, sds_ring); @@ -1948,7 +1945,7 @@ void qlcnic_83xx_napi_disable(struct qlcnic_adapter *adapter) if ((adapter->flags & QLCNIC_MSIX_ENABLED) && !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; qlcnic_83xx_disable_tx_intr(adapter, tx_ring); napi_synchronize(&tx_ring->napi); @@ -1965,10 +1962,10 @@ int qlcnic_83xx_napi_add(struct qlcnic_adapter *adapter, struct qlcnic_host_tx_ring *tx_ring; struct qlcnic_recv_context *recv_ctx = adapter->recv_ctx; - if (qlcnic_alloc_sds_rings(recv_ctx, adapter->max_sds_rings)) + if (qlcnic_alloc_sds_rings(recv_ctx, adapter->drv_sds_rings)) return -ENOMEM; - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; if (adapter->flags & QLCNIC_MSIX_ENABLED) { if (!(adapter->flags & QLCNIC_TX_INTR_SHARED)) @@ -1994,7 +1991,7 @@ int qlcnic_83xx_napi_add(struct qlcnic_adapter *adapter, if ((adapter->flags & QLCNIC_MSIX_ENABLED) && !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; netif_napi_add(netdev, &tx_ring->napi, qlcnic_83xx_msix_tx_poll, @@ -2012,7 +2009,7 @@ void qlcnic_83xx_napi_del(struct qlcnic_adapter *adapter) struct qlcnic_recv_context *recv_ctx = adapter->recv_ctx; struct qlcnic_host_tx_ring *tx_ring; - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; netif_napi_del(&sds_ring->napi); } @@ -2021,7 +2018,7 @@ void qlcnic_83xx_napi_del(struct qlcnic_adapter *adapter) if ((adapter->flags & QLCNIC_MSIX_ENABLED) && !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; netif_napi_del(&tx_ring->napi); } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index dcf4a4e7ce23..05c1eef8df13 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -548,36 +548,75 @@ static struct qlcnic_hardware_ops qlcnic_hw_ops = { .io_resume = qlcnic_82xx_io_resume, }; -static void qlcnic_get_multiq_capability(struct qlcnic_adapter *adapter) +static int qlcnic_check_multi_tx_capability(struct qlcnic_adapter *adapter) { struct qlcnic_hardware_context *ahw = adapter->ahw; - int num_tx_q; - if (ahw->msix_supported && + if (qlcnic_82xx_check(adapter) && (ahw->extra_capability[0] & QLCNIC_FW_CAPABILITY_2_MULTI_TX)) { - num_tx_q = min_t(int, QLCNIC_DEF_NUM_TX_RINGS, - num_online_cpus()); - if (num_tx_q > 1) { - test_and_set_bit(__QLCNIC_MULTI_TX_UNIQUE, - &adapter->state); - adapter->max_drv_tx_rings = num_tx_q; - } + test_and_set_bit(__QLCNIC_MULTI_TX_UNIQUE, &adapter->state); + return 0; } else { - adapter->max_drv_tx_rings = 1; + return 1; } } +static int qlcnic_max_rings(struct qlcnic_adapter *adapter, u8 ring_cnt, + int queue_type) +{ + int num_rings, max_rings = QLCNIC_MAX_SDS_RINGS; + + if (queue_type == QLCNIC_RX_QUEUE) + max_rings = adapter->max_sds_rings; + else if (queue_type == QLCNIC_TX_QUEUE) + max_rings = adapter->max_tx_rings; + + num_rings = rounddown_pow_of_two(min_t(int, num_online_cpus(), + max_rings)); + + if (ring_cnt > num_rings) + return num_rings; + else + return ring_cnt; +} + +void qlcnic_set_tx_ring_count(struct qlcnic_adapter *adapter, u8 tx_cnt) +{ + /* 83xx adapter does not have max_tx_rings intialized in probe */ + if (adapter->max_tx_rings) + adapter->drv_tx_rings = qlcnic_max_rings(adapter, tx_cnt, + QLCNIC_TX_QUEUE); + else + adapter->drv_tx_rings = tx_cnt; + + dev_info(&adapter->pdev->dev, "Set %d Tx rings\n", + adapter->drv_tx_rings); +} + +void qlcnic_set_sds_ring_count(struct qlcnic_adapter *adapter, u8 rx_cnt) +{ + /* 83xx adapter does not have max_sds_rings intialized in probe */ + if (adapter->max_sds_rings) + adapter->drv_sds_rings = qlcnic_max_rings(adapter, rx_cnt, + QLCNIC_RX_QUEUE); + else + adapter->drv_sds_rings = rx_cnt; + + dev_info(&adapter->pdev->dev, "Set %d SDS rings\n", + adapter->drv_sds_rings); +} + int qlcnic_enable_msix(struct qlcnic_adapter *adapter, u32 num_msix) { struct pci_dev *pdev = adapter->pdev; - int max_tx_rings, max_sds_rings, tx_vector; + int drv_tx_rings, drv_sds_rings, tx_vector; int err = -1, i; if (adapter->flags & QLCNIC_TX_INTR_SHARED) { - max_tx_rings = 0; + drv_tx_rings = 0; tx_vector = 0; } else { - max_tx_rings = adapter->max_drv_tx_rings; + drv_tx_rings = adapter->drv_tx_rings; tx_vector = 1; } @@ -589,7 +628,7 @@ int qlcnic_enable_msix(struct qlcnic_adapter *adapter, u32 num_msix) return -ENOMEM; } - adapter->max_sds_rings = 1; + adapter->drv_sds_rings = QLCNIC_SINGLE_RING; adapter->flags &= ~(QLCNIC_MSI_ENABLED | QLCNIC_MSIX_ENABLED); if (adapter->ahw->msix_supported) { @@ -602,18 +641,18 @@ int qlcnic_enable_msix(struct qlcnic_adapter *adapter, u32 num_msix) if (qlcnic_83xx_check(adapter)) { adapter->ahw->num_msix = num_msix; /* subtract mail box and tx ring vectors */ - adapter->max_sds_rings = num_msix - - max_tx_rings - 1; + adapter->drv_sds_rings = num_msix - + drv_tx_rings - 1; } else { adapter->ahw->num_msix = num_msix; if (qlcnic_check_multi_tx(adapter) && !adapter->ahw->diag_test && - (adapter->max_drv_tx_rings > 1)) - max_sds_rings = num_msix - max_tx_rings; + (adapter->drv_tx_rings > 1)) + drv_sds_rings = num_msix - drv_tx_rings; else - max_sds_rings = num_msix; + drv_sds_rings = num_msix; - adapter->max_sds_rings = max_sds_rings; + adapter->drv_sds_rings = drv_sds_rings; } dev_info(&pdev->dev, "using msi-x interrupts\n"); return err; @@ -624,13 +663,13 @@ int qlcnic_enable_msix(struct qlcnic_adapter *adapter, u32 num_msix) if (qlcnic_83xx_check(adapter)) { if (err < (QLC_83XX_MINIMUM_VECTOR - tx_vector)) return err; - err -= (max_tx_rings + 1); + err -= drv_tx_rings + 1; num_msix = rounddown_pow_of_two(err); - num_msix += (max_tx_rings + 1); + num_msix += drv_tx_rings + 1; } else { num_msix = rounddown_pow_of_two(err); if (qlcnic_check_multi_tx(adapter)) - num_msix += max_tx_rings; + num_msix += drv_tx_rings; } if (num_msix) { @@ -683,25 +722,14 @@ static int qlcnic_enable_msi_legacy(struct qlcnic_adapter *adapter) return err; } -int qlcnic_82xx_setup_intr(struct qlcnic_adapter *adapter, u8 num_intr, int txq) +int qlcnic_82xx_setup_intr(struct qlcnic_adapter *adapter) { - struct qlcnic_hardware_context *ahw = adapter->ahw; int num_msix, err = 0; - if (!num_intr) - num_intr = QLCNIC_DEF_NUM_STS_DESC_RINGS; + num_msix = adapter->drv_sds_rings; - if (ahw->msix_supported) { - num_msix = rounddown_pow_of_two(min_t(int, num_online_cpus(), - num_intr)); - if (qlcnic_check_multi_tx(adapter)) { - if (txq) - adapter->max_drv_tx_rings = txq; - num_msix += adapter->max_drv_tx_rings; - } - } else { - num_msix = 1; - } + if (qlcnic_check_multi_tx(adapter)) + num_msix += adapter->drv_tx_rings; err = qlcnic_enable_msix(adapter, num_msix); if (err == -ENOMEM) @@ -1133,18 +1161,25 @@ qlcnic_initialize_nic(struct qlcnic_adapter *adapter) if (err == -EIO) return err; adapter->ahw->extra_capability[0] = temp; + } else { + adapter->ahw->extra_capability[0] = 0; } + adapter->ahw->max_mac_filters = nic_info.max_mac_filters; adapter->ahw->max_mtu = nic_info.max_mtu; - /* Disable NPAR for 83XX */ - if (qlcnic_83xx_check(adapter)) - return err; - - if (adapter->ahw->capabilities & BIT_6) + if (adapter->ahw->capabilities & BIT_6) { adapter->flags |= QLCNIC_ESWITCH_ENABLED; - else + adapter->ahw->nic_mode = QLCNIC_VNIC_MODE; + adapter->max_tx_rings = QLCNIC_MAX_HW_VNIC_TX_RINGS; + adapter->max_sds_rings = QLCNIC_MAX_VNIC_SDS_RINGS; + + dev_info(&adapter->pdev->dev, "vNIC mode enabled.\n"); + } else { + adapter->ahw->nic_mode = QLCNIC_DEFAULT_MODE; + adapter->max_tx_rings = QLCNIC_MAX_HW_TX_RINGS; adapter->flags &= ~QLCNIC_ESWITCH_ENABLED; + } return err; } @@ -1292,6 +1327,8 @@ qlcnic_check_eswitch_mode(struct qlcnic_adapter *adapter) "HAL Version: %d, Privileged function\n", adapter->ahw->fw_hal_version); } + } else { + adapter->ahw->nic_mode = QLCNIC_DEFAULT_MODE; } adapter->flags |= QLCNIC_ADAPTER_INITIALIZED; @@ -1551,7 +1588,7 @@ qlcnic_request_irq(struct qlcnic_adapter *adapter) if (qlcnic_82xx_check(adapter) || (qlcnic_83xx_check(adapter) && (adapter->flags & QLCNIC_MSIX_ENABLED))) { - num_sds_rings = adapter->max_sds_rings; + num_sds_rings = adapter->drv_sds_rings; for (ring = 0; ring < num_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; if (qlcnic_82xx_check(adapter) && @@ -1585,7 +1622,7 @@ qlcnic_request_irq(struct qlcnic_adapter *adapter) (adapter->flags & QLCNIC_MSIX_ENABLED) && !(adapter->flags & QLCNIC_TX_INTR_SHARED))) { handler = qlcnic_msix_tx_intr; - for (ring = 0; ring < adapter->max_drv_tx_rings; + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; snprintf(tx_ring->name, sizeof(tx_ring->name), @@ -1613,7 +1650,7 @@ qlcnic_free_irq(struct qlcnic_adapter *adapter) if (qlcnic_82xx_check(adapter) || (qlcnic_83xx_check(adapter) && (adapter->flags & QLCNIC_MSIX_ENABLED))) { - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; free_irq(sds_ring->irq, sds_ring); } @@ -1622,7 +1659,7 @@ qlcnic_free_irq(struct qlcnic_adapter *adapter) !(adapter->flags & QLCNIC_TX_INTR_SHARED)) || (qlcnic_82xx_check(adapter) && qlcnic_check_multi_tx(adapter))) { - for (ring = 0; ring < adapter->max_drv_tx_rings; + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; if (tx_ring->irq) @@ -1676,7 +1713,7 @@ int __qlcnic_up(struct qlcnic_adapter *adapter, struct net_device *netdev) adapter->ahw->linkup = 0; - if (adapter->max_sds_rings > 1) + if (adapter->drv_sds_rings > 1) qlcnic_config_rss(adapter, 1); qlcnic_config_intr_coalesce(adapter); @@ -1718,6 +1755,7 @@ void __qlcnic_down(struct qlcnic_adapter *adapter, struct net_device *netdev) if (qlcnic_sriov_vf_check(adapter)) qlcnic_sriov_cleanup_async_list(&adapter->ahw->sriov->bc); smp_mb(); + spin_lock(&adapter->tx_clean_lock); netif_carrier_off(netdev); adapter->ahw->linkup = 0; netif_tx_disable(netdev); @@ -1736,8 +1774,9 @@ void __qlcnic_down(struct qlcnic_adapter *adapter, struct net_device *netdev) qlcnic_reset_rx_buffers_list(adapter); - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) + for (ring = 0; ring < adapter->drv_tx_rings; ring++) qlcnic_release_tx_buffers(adapter, &adapter->tx_ring[ring]); + spin_unlock(&adapter->tx_clean_lock); } /* Usage: During suspend and firmware recovery module */ @@ -1813,16 +1852,16 @@ void qlcnic_detach(struct qlcnic_adapter *adapter) adapter->is_up = 0; } -void qlcnic_diag_free_res(struct net_device *netdev, int max_sds_rings) +void qlcnic_diag_free_res(struct net_device *netdev, int drv_sds_rings) { struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_host_sds_ring *sds_ring; - int max_tx_rings = adapter->max_drv_tx_rings; + int drv_tx_rings = adapter->drv_tx_rings; int ring; clear_bit(__QLCNIC_DEV_UP, &adapter->state); if (adapter->ahw->diag_test == QLCNIC_INTERRUPT_TEST) { - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &adapter->recv_ctx->sds_rings[ring]; qlcnic_disable_int(sds_ring); } @@ -1833,8 +1872,8 @@ void qlcnic_diag_free_res(struct net_device *netdev, int max_sds_rings) qlcnic_detach(adapter); adapter->ahw->diag_test = 0; - adapter->max_sds_rings = max_sds_rings; - adapter->max_drv_tx_rings = max_tx_rings; + adapter->drv_sds_rings = drv_sds_rings; + adapter->drv_tx_rings = drv_tx_rings; if (qlcnic_attach(adapter)) goto out; @@ -1900,10 +1939,10 @@ int qlcnic_diag_alloc_res(struct net_device *netdev, int test) qlcnic_detach(adapter); - adapter->max_sds_rings = 1; + adapter->drv_sds_rings = QLCNIC_SINGLE_RING; + adapter->drv_tx_rings = QLCNIC_SINGLE_RING; adapter->ahw->diag_test = test; adapter->ahw->linkup = 0; - adapter->max_drv_tx_rings = 1; ret = qlcnic_attach(adapter); if (ret) { @@ -1924,7 +1963,7 @@ int qlcnic_diag_alloc_res(struct net_device *netdev, int test) } if (adapter->ahw->diag_test == QLCNIC_INTERRUPT_TEST) { - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &adapter->recv_ctx->sds_rings[ring]; qlcnic_enable_int(sds_ring); } @@ -2097,7 +2136,7 @@ void qlcnic_free_tx_rings(struct qlcnic_adapter *adapter) int ring; struct qlcnic_host_tx_ring *tx_ring; - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; if (tx_ring && tx_ring->cmd_buf_arr != NULL) { vfree(tx_ring->cmd_buf_arr); @@ -2115,14 +2154,14 @@ int qlcnic_alloc_tx_rings(struct qlcnic_adapter *adapter, struct qlcnic_host_tx_ring *tx_ring; struct qlcnic_cmd_buffer *cmd_buf_arr; - tx_ring = kcalloc(adapter->max_drv_tx_rings, + tx_ring = kcalloc(adapter->drv_tx_rings, sizeof(struct qlcnic_host_tx_ring), GFP_KERNEL); if (tx_ring == NULL) return -ENOMEM; adapter->tx_ring = tx_ring; - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; tx_ring->num_desc = adapter->num_txd; tx_ring->txq = netdev_get_tx_queue(netdev, ring); @@ -2137,11 +2176,11 @@ int qlcnic_alloc_tx_rings(struct qlcnic_adapter *adapter, if (qlcnic_83xx_check(adapter) || (qlcnic_82xx_check(adapter) && qlcnic_check_multi_tx(adapter))) { - for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; tx_ring->adapter = adapter; if (adapter->flags & QLCNIC_MSIX_ENABLED) { - index = adapter->max_sds_rings + ring; + index = adapter->drv_sds_rings + ring; vector = adapter->msix_entries[index].vector; tx_ring->irq = vector; } @@ -2161,8 +2200,7 @@ void qlcnic_set_drv_version(struct qlcnic_adapter *adapter) else if (qlcnic_83xx_check(adapter)) fw_cmd = QLCNIC_CMD_83XX_SET_DRV_VER; - if ((ahw->capabilities & QLCNIC_FW_CAPABILITY_MORE_CAPS) && - (ahw->extra_capability[0] & QLCNIC_FW_CAPABILITY_SET_DRV_VER)) + if (ahw->extra_capability[0] & QLCNIC_FW_CAPABILITY_SET_DRV_VER) qlcnic_fw_cmd_set_drv_version(adapter, fw_cmd); } @@ -2261,6 +2299,7 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) rwlock_init(&adapter->ahw->crb_lock); mutex_init(&adapter->ahw->mem_lock); + spin_lock_init(&adapter->tx_clean_lock); INIT_LIST_HEAD(&adapter->mac_list); qlcnic_register_dcb(adapter); @@ -2275,20 +2314,21 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_maintenance_mode; } - qlcnic_get_multiq_capability(adapter); - - if ((adapter->ahw->act_pci_func > 2) && - qlcnic_check_multi_tx(adapter)) { - adapter->max_drv_tx_rings = QLCNIC_DEF_NUM_TX_RINGS; - dev_info(&adapter->pdev->dev, - "vNIC mode enabled, Set max TX rings = %d\n", - adapter->max_drv_tx_rings); + /* compute and set default and max tx/sds rings */ + if (adapter->ahw->msix_supported) { + if (qlcnic_check_multi_tx_capability(adapter) == 1) + qlcnic_set_tx_ring_count(adapter, + QLCNIC_SINGLE_RING); + else + qlcnic_set_tx_ring_count(adapter, + QLCNIC_DEF_TX_RINGS); + qlcnic_set_sds_ring_count(adapter, + QLCNIC_DEF_SDS_RINGS); + } else { + qlcnic_set_tx_ring_count(adapter, QLCNIC_SINGLE_RING); + qlcnic_set_sds_ring_count(adapter, QLCNIC_SINGLE_RING); } - if (!qlcnic_check_multi_tx(adapter)) { - clear_bit(__QLCNIC_MULTI_TX_UNIQUE, &adapter->state); - adapter->max_drv_tx_rings = 1; - } err = qlcnic_setup_idc_param(adapter); if (err) goto err_out_free_hw; @@ -2299,16 +2339,26 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (dcb && qlcnic_dcb_attach(dcb)) qlcnic_clear_dcb_ops(dcb); - } else if (qlcnic_83xx_check(adapter)) { - adapter->max_drv_tx_rings = 1; qlcnic_83xx_check_vf(adapter, ent); adapter->portnum = adapter->ahw->pci_func; err = qlcnic_83xx_init(adapter, pci_using_dac); if (err) { - dev_err(&pdev->dev, "%s: failed\n", __func__); - goto err_out_free_hw; + switch (err) { + case -ENOTRECOVERABLE: + dev_err(&pdev->dev, "Adapter initialization failed due to a faulty hardware. Please reboot\n"); + dev_err(&pdev->dev, "If reboot doesn't help, please replace the adapter with new one and return the faulty adapter for repair\n"); + goto err_out_free_hw; + case -ENOMEM: + dev_err(&pdev->dev, "Adapter initialization failed. Please reboot\n"); + goto err_out_free_hw; + default: + dev_err(&pdev->dev, "Adapter initialization failed. A reboot may be required to recover from this failure\n"); + dev_err(&pdev->dev, "If reboot does not help to recover from this failure, try a flash update of the adapter\n"); + goto err_out_maintenance_mode; + } } + if (qlcnic_sriov_vf_check(adapter)) return 0; } else { @@ -2336,7 +2386,7 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) "Device does not support MSI interrupts\n"); if (qlcnic_82xx_check(adapter)) { - err = qlcnic_setup_intr(adapter, 0, 0); + err = qlcnic_setup_intr(adapter); if (err) { dev_err(&pdev->dev, "Failed to setup interrupt\n"); goto err_out_disable_msi; @@ -2410,8 +2460,16 @@ err_out_disable_pdev: return err; err_out_maintenance_mode: + set_bit(__QLCNIC_MAINTENANCE_MODE, &adapter->state); netdev->netdev_ops = &qlcnic_netdev_failed_ops; SET_ETHTOOL_OPS(netdev, &qlcnic_ethtool_failed_ops); + ahw->port_type = QLCNIC_XGBE; + + if (qlcnic_83xx_check(adapter)) + adapter->tgt_status_reg = NULL; + else + ahw->board_type = QLCNIC_BRDTYPE_P3P_10G_SFP_PLUS; + err = register_netdev(netdev); if (err) { @@ -2533,12 +2591,11 @@ static int qlcnic_resume(struct pci_dev *pdev) static int qlcnic_open(struct net_device *netdev) { struct qlcnic_adapter *adapter = netdev_priv(netdev); - u32 state; int err; - state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE); - if (state == QLCNIC_DEV_FAILED || state == QLCNIC_DEV_BADBAD) { - netdev_err(netdev, "%s: Device is in FAILED state\n", __func__); + if (test_bit(__QLCNIC_MAINTENANCE_MODE, &adapter->state)) { + netdev_err(netdev, "%s: Device is in non-operational state\n", + __func__); return -EIO; } @@ -2700,24 +2757,21 @@ static void qlcnic_tx_timeout(struct net_device *netdev) QLCNIC_FORCE_FW_DUMP_KEY); } else { netdev_info(netdev, "Tx timeout, reset adapter context.\n"); - if (qlcnic_82xx_check(adapter)) { - for (ring = 0; ring < adapter->max_drv_tx_rings; - ring++) { - tx_ring = &adapter->tx_ring[ring]; - dev_info(&netdev->dev, "ring=%d\n", ring); - dev_info(&netdev->dev, "crb_intr_mask=%d\n", - readl(tx_ring->crb_intr_mask)); - dev_info(&netdev->dev, "producer=%d\n", - readl(tx_ring->crb_cmd_producer)); - dev_info(&netdev->dev, "sw_consumer = %d\n", - tx_ring->sw_consumer); - dev_info(&netdev->dev, "hw_consumer = %d\n", - le32_to_cpu(*(tx_ring->hw_consumer))); - dev_info(&netdev->dev, "xmit-on=%llu\n", - tx_ring->xmit_on); - dev_info(&netdev->dev, "xmit-off=%llu\n", - tx_ring->xmit_off); - } + for (ring = 0; ring < adapter->drv_tx_rings; ring++) { + tx_ring = &adapter->tx_ring[ring]; + netdev_info(netdev, "Tx ring=%d\n", ring); + netdev_info(netdev, + "crb_intr_mask=%d, producer=%d, sw_consumer=%d, hw_consumer=%d\n", + readl(tx_ring->crb_intr_mask), + readl(tx_ring->crb_cmd_producer), + tx_ring->sw_consumer, + le32_to_cpu(*(tx_ring->hw_consumer))); + netdev_info(netdev, + "xmit_finished=%llu, xmit_called=%llu, xmit_on=%llu, xmit_off=%llu\n", + tx_ring->tx_stats.xmit_finished, + tx_ring->tx_stats.xmit_called, + tx_ring->tx_stats.xmit_on, + tx_ring->tx_stats.xmit_off); } adapter->ahw->reset_context = 1; } @@ -2831,7 +2885,7 @@ static void qlcnic_poll_controller(struct net_device *netdev) struct qlcnic_recv_context *recv_ctx = adapter->recv_ctx; disable_irq(adapter->irq); - for (ring = 0; ring < adapter->max_sds_rings; ring++) { + for (ring = 0; ring < adapter->drv_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; qlcnic_intr(adapter->irq, sds_ring); } @@ -3251,8 +3305,9 @@ void qlcnic_82xx_dev_request_reset(struct qlcnic_adapter *adapter, u32 key) return; state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE); - if (state == QLCNIC_DEV_FAILED || state == QLCNIC_DEV_BADBAD) { - netdev_err(adapter->netdev, "%s: Device is in FAILED state\n", + + if (test_bit(__QLCNIC_MAINTENANCE_MODE, &adapter->state)) { + netdev_err(adapter->netdev, "%s: Device is in non-operational state\n", __func__); qlcnic_api_unlock(adapter); @@ -3497,7 +3552,7 @@ static int qlcnic_attach_func(struct pci_dev *pdev) qlcnic_clr_drv_state(adapter); kfree(adapter->msix_entries); adapter->msix_entries = NULL; - err = qlcnic_setup_intr(adapter, 0, 0); + err = qlcnic_setup_intr(adapter); if (err) { kfree(adapter->msix_entries); @@ -3642,130 +3697,90 @@ qlcnicvf_start_firmware(struct qlcnic_adapter *adapter) return err; } -int qlcnic_validate_max_tx_rings(struct qlcnic_adapter *adapter, u32 txq) +int qlcnic_validate_rings(struct qlcnic_adapter *adapter, __u32 ring_cnt, + int queue_type) { struct net_device *netdev = adapter->netdev; - u8 max_hw = QLCNIC_MAX_TX_RINGS; - u32 max_allowed; + u8 max_hw_rings = 0; + char buf[8]; + int cur_rings; - if (!qlcnic_use_msi_x && !qlcnic_use_msi) { - netdev_err(netdev, "No Multi TX-Q support in INT-x mode\n"); - return -EINVAL; + if (queue_type == QLCNIC_RX_QUEUE) { + max_hw_rings = adapter->max_sds_rings; + cur_rings = adapter->drv_sds_rings; + strcpy(buf, "SDS"); + } else if (queue_type == QLCNIC_TX_QUEUE) { + max_hw_rings = adapter->max_tx_rings; + cur_rings = adapter->drv_tx_rings; + strcpy(buf, "Tx"); } - if (!qlcnic_check_multi_tx(adapter)) { - netdev_err(netdev, "No Multi TX-Q support\n"); + if (!qlcnic_use_msi_x && !qlcnic_use_msi) { + netdev_err(netdev, "No RSS/TSS support in INT-x mode\n"); return -EINVAL; } - if (txq > QLCNIC_MAX_TX_RINGS) { - netdev_err(netdev, "Invalid ring count\n"); + if (adapter->flags & QLCNIC_MSI_ENABLED) { + netdev_err(netdev, "No RSS/TSS support in MSI mode\n"); return -EINVAL; } - max_allowed = rounddown_pow_of_two(min_t(int, max_hw, - num_online_cpus())); - if ((txq > max_allowed) || !is_power_of_2(txq)) { - if (!is_power_of_2(txq)) - netdev_err(netdev, - "TX queue should be a power of 2\n"); - if (txq > num_online_cpus()) - netdev_err(netdev, - "Tx queue should not be higher than [%u], number of online CPUs in the system\n", - num_online_cpus()); - netdev_err(netdev, "Unable to configure %u Tx rings\n", txq); + if (ring_cnt < 2) { + netdev_err(netdev, + "%s rings value should not be lower than 2\n", buf); return -EINVAL; } - return 0; -} - -int qlcnic_validate_max_rss(struct qlcnic_adapter *adapter, - __u32 val) -{ - struct net_device *netdev = adapter->netdev; - u8 max_hw = adapter->ahw->max_rx_ques; - u32 max_allowed; - - if (!qlcnic_use_msi_x && !qlcnic_use_msi) { - netdev_err(netdev, "No RSS support in INT-x mode\n"); + if (!is_power_of_2(ring_cnt)) { + netdev_err(netdev, "%s rings value should be a power of 2\n", + buf); return -EINVAL; } - if (val > QLCNIC_MAX_SDS_RINGS) { - netdev_err(netdev, "RSS value should not be higher than %u\n", - QLCNIC_MAX_SDS_RINGS); - return -EINVAL; + if (qlcnic_82xx_check(adapter) && (queue_type == QLCNIC_TX_QUEUE) && + !qlcnic_check_multi_tx(adapter)) { + netdev_err(netdev, "No Multi Tx queue support\n"); + return -EINVAL; } - max_allowed = rounddown_pow_of_two(min_t(int, max_hw, - num_online_cpus())); - if ((val > max_allowed) || (val < 2) || !is_power_of_2(val)) { - if (!is_power_of_2(val)) - netdev_err(netdev, "RSS value should be a power of 2\n"); - - if (val < 2) - netdev_err(netdev, "RSS value should not be lower than 2\n"); - - if (val > max_hw) - netdev_err(netdev, - "RSS value should not be higher than[%u], the max RSS rings supported by the adapter\n", - max_hw); - - if (val > num_online_cpus()) - netdev_err(netdev, - "RSS value should not be higher than[%u], number of online CPUs in the system\n", - num_online_cpus()); - - netdev_err(netdev, "Unable to configure %u RSS rings\n", val); - + if (ring_cnt > num_online_cpus()) { + netdev_err(netdev, + "%s value[%u] should not be higher than, number of online CPUs\n", + buf, num_online_cpus()); return -EINVAL; } + return 0; } -int qlcnic_set_max_rss(struct qlcnic_adapter *adapter, u8 data, int txq) +int qlcnic_setup_rings(struct qlcnic_adapter *adapter, u8 rx_cnt, u8 tx_cnt) { - int err; struct net_device *netdev = adapter->netdev; - int num_msix; + int err; if (test_bit(__QLCNIC_RESETTING, &adapter->state)) return -EBUSY; - if (qlcnic_82xx_check(adapter) && !qlcnic_use_msi_x && - !qlcnic_use_msi) { - netdev_err(netdev, "No RSS support in INT-x mode\n"); - return -EINVAL; - } - netif_device_detach(netdev); if (netif_running(netdev)) __qlcnic_down(adapter, netdev); qlcnic_detach(adapter); - if (qlcnic_82xx_check(adapter)) { - if (txq != 0) - adapter->max_drv_tx_rings = txq; - - if (qlcnic_check_multi_tx(adapter) && - (txq > adapter->max_drv_tx_rings)) - num_msix = adapter->max_drv_tx_rings; - else - num_msix = data; - } - if (qlcnic_83xx_check(adapter)) { qlcnic_83xx_free_mbx_intr(adapter); qlcnic_83xx_enable_mbx_poll(adapter); } - netif_set_real_num_tx_queues(netdev, adapter->max_drv_tx_rings); - qlcnic_teardown_intr(adapter); - err = qlcnic_setup_intr(adapter, data, txq); + /* compute and set default and max tx/sds rings */ + qlcnic_set_tx_ring_count(adapter, tx_cnt); + qlcnic_set_sds_ring_count(adapter, rx_cnt); + + netif_set_real_num_tx_queues(netdev, adapter->drv_tx_rings); + + err = qlcnic_setup_intr(adapter); if (err) { kfree(adapter->msix_entries); netdev_err(netdev, "failed to setup interrupt\n"); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index 8b96e29df30f..21a4b274d2e4 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -508,7 +508,11 @@ static int qlcnic_sriov_setup_vf(struct qlcnic_adapter *adapter, dev_warn(&adapter->pdev->dev, "Device does not support MSI interrupts\n"); - err = qlcnic_setup_intr(adapter, 1, 0); + /* compute and set default and max tx/sds rings */ + qlcnic_set_tx_ring_count(adapter, QLCNIC_SINGLE_RING); + qlcnic_set_sds_ring_count(adapter, QLCNIC_SINGLE_RING); + + err = qlcnic_setup_intr(adapter); if (err) { dev_err(&adapter->pdev->dev, "Failed to setup interrupt\n"); goto err_out_disable_msi; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c index 019f4377307f..1a9f8a400e50 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c @@ -156,7 +156,7 @@ static int qlcnic_82xx_store_beacon(struct qlcnic_adapter *adapter, const char *buf, size_t len) { struct qlcnic_hardware_context *ahw = adapter->ahw; - int err, max_sds_rings = adapter->max_sds_rings; + int err, drv_sds_rings = adapter->drv_sds_rings; u16 beacon; u8 h_beacon_state, b_state, b_rate; @@ -211,7 +211,7 @@ static int qlcnic_82xx_store_beacon(struct qlcnic_adapter *adapter, } if (test_and_clear_bit(__QLCNIC_DIAG_RES_ALLOC, &adapter->state)) - qlcnic_diag_free_res(adapter->netdev, max_sds_rings); + qlcnic_diag_free_res(adapter->netdev, drv_sds_rings); out: if (!ahw->beacon_state) @@ -1272,7 +1272,6 @@ void qlcnic_remove_sysfs_entries(struct qlcnic_adapter *adapter) void qlcnic_create_diag_entries(struct qlcnic_adapter *adapter) { struct device *dev = &adapter->pdev->dev; - u32 state; if (device_create_bin_file(dev, &bin_attr_port_stats)) dev_info(dev, "failed to create port stats sysfs entry"); @@ -1286,8 +1285,7 @@ void qlcnic_create_diag_entries(struct qlcnic_adapter *adapter) if (device_create_bin_file(dev, &bin_attr_mem)) dev_info(dev, "failed to create mem sysfs entry\n"); - state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE); - if (state == QLCNIC_DEV_FAILED || state == QLCNIC_DEV_BADBAD) + if (test_bit(__QLCNIC_MAINTENANCE_MODE, &adapter->state)) return; if (device_create_bin_file(dev, &bin_attr_pci_config)) @@ -1313,7 +1311,6 @@ void qlcnic_create_diag_entries(struct qlcnic_adapter *adapter) void qlcnic_remove_diag_entries(struct qlcnic_adapter *adapter) { struct device *dev = &adapter->pdev->dev; - u32 state; device_remove_bin_file(dev, &bin_attr_port_stats); @@ -1323,8 +1320,7 @@ void qlcnic_remove_diag_entries(struct qlcnic_adapter *adapter) device_remove_bin_file(dev, &bin_attr_crb); device_remove_bin_file(dev, &bin_attr_mem); - state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE); - if (state == QLCNIC_DEV_FAILED || state == QLCNIC_DEV_BADBAD) + if (test_bit(__QLCNIC_MAINTENANCE_MODE, &adapter->state)) return; device_remove_bin_file(dev, &bin_attr_pci_config); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index eaf11e47334f..d256ce19d4de 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2663,6 +2663,12 @@ static int sh_eth_drv_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); pm_runtime_resume(&pdev->dev); + if (!pd) { + dev_err(&pdev->dev, "no platform data\n"); + ret = -EINVAL; + goto out_release; + } + /* get PHY ID */ mdp->phy_id = pd->phy; mdp->phy_interface = pd->phy_interface; diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index aac22a1e85b8..b14a717ac3e8 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -141,6 +141,8 @@ struct efx_special_buffer { * @len: Length of this fragment. * This field is zero when the queue slot is empty. * @unmap_len: Length of this fragment to unmap + * @dma_offset: Offset of @dma_addr from the address of the backing DMA mapping. + * Only valid if @unmap_len != 0. */ struct efx_tx_buffer { union { @@ -154,6 +156,7 @@ struct efx_tx_buffer { unsigned short flags; unsigned short len; unsigned short unmap_len; + unsigned short dma_offset; }; #define EFX_TX_BUF_CONT 1 /* not last descriptor of packet */ #define EFX_TX_BUF_SKB 2 /* buffer is last part of skb */ diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 282692c48e6b..c49d1fb16965 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -65,8 +65,7 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, { if (buffer->unmap_len) { struct device *dma_dev = &tx_queue->efx->pci_dev->dev; - dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len - - buffer->unmap_len); + dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset; if (buffer->flags & EFX_TX_BUF_MAP_SINGLE) dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len, DMA_TO_DEVICE); @@ -414,6 +413,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Transfer ownership of the unmapping to the final buffer */ buffer->flags = EFX_TX_BUF_CONT | dma_flags; buffer->unmap_len = unmap_len; + buffer->dma_offset = buffer->dma_addr - unmap_addr; unmap_len = 0; /* Get address and size of next fragment */ @@ -980,6 +980,7 @@ static int efx_tso_put_header(struct efx_tx_queue *tx_queue, return -ENOMEM; } buffer->unmap_len = buffer->len; + buffer->dma_offset = 0; buffer->flags |= EFX_TX_BUF_MAP_SINGLE; } @@ -1121,6 +1122,7 @@ static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, if (st->in_len == 0) { /* Transfer ownership of the DMA mapping */ buffer->unmap_len = st->unmap_len; + buffer->dma_offset = buffer->unmap_len - buffer->len; buffer->flags |= st->dma_flags; st->unmap_len = 0; } @@ -1219,6 +1221,7 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, if (is_last) { buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_MAP_SINGLE; buffer->unmap_len = st->header_unmap_len; + buffer->dma_offset = 0; /* Ensure we only unmap them once in case of a * later DMA mapping error and rollback */ diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 8c5c24a16f8a..8ae1f8a7bf38 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -91,9 +91,9 @@ static int rx_copybreak; /* These identify the driver base version and may not be removed. */ static char version[] = -DRV_NAME ".c:v1.11 1/7/2001 Written by Donald Becker <becker@scyld.com>\n"; +DRV_NAME ".c:v1.11 1/7/2001 Written by Donald Becker <becker@scyld.com>"; static char version2[] = -" (unofficial 2.4.x kernel port, version " DRV_VERSION ", " DRV_RELDATE ")\n"; +" (unofficial 2.4.x kernel port, version " DRV_VERSION ", " DRV_RELDATE ")"; MODULE_AUTHOR("Donald Becker <becker@scyld.com>"); MODULE_DESCRIPTION("SMC 83c170 EPIC series Ethernet driver"); @@ -332,9 +332,7 @@ static int epic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* when built into the kernel, we only print version if device is found */ #ifndef MODULE - static int printed_version; - if (!printed_version++) - printk(KERN_INFO "%s%s", version, version2); + pr_info_once("%s%s\n", version, version2); #endif card_idx++; @@ -423,9 +421,9 @@ static int epic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ((__le16 *)dev->dev_addr)[i] = cpu_to_le16(er16(LAN0 + i*4)); if (debug > 2) { - dev_printk(KERN_DEBUG, &pdev->dev, "EEPROM contents:\n"); + dev_dbg(&pdev->dev, "EEPROM contents:\n"); for (i = 0; i < 64; i++) - printk(" %4.4x%s", read_eeprom(ep, i), + pr_cont(" %4.4x%s", read_eeprom(ep, i), i % 16 == 15 ? "\n" : ""); } @@ -490,10 +488,10 @@ static int epic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret < 0) goto err_out_unmap_rx; - printk(KERN_INFO "%s: %s at %lx, IRQ %d, %pM\n", - dev->name, pci_id_tbl[chip_idx].name, - (long)pci_resource_start(pdev, EPIC_BAR), pdev->irq, - dev->dev_addr); + netdev_info(dev, "%s at %lx, IRQ %d, %pM\n", + pci_id_tbl[chip_idx].name, + (long)pci_resource_start(pdev, EPIC_BAR), pdev->irq, + dev->dev_addr); out: return ret; @@ -703,9 +701,8 @@ static int epic_open(struct net_device *dev) mdio_write(dev, ep->phys[0], MII_BMCR, media2miictl[dev->if_port&15]); if (dev->if_port == 1) { if (debug > 1) - printk(KERN_INFO "%s: Using the 10base2 transceiver, MII " - "status %4.4x.\n", - dev->name, mdio_read(dev, ep->phys[0], MII_BMSR)); + netdev_info(dev, "Using the 10base2 transceiver, MII status %4.4x.\n", + mdio_read(dev, ep->phys[0], MII_BMSR)); } } else { int mii_lpa = mdio_read(dev, ep->phys[0], MII_LPA); @@ -715,10 +712,10 @@ static int epic_open(struct net_device *dev) else if (! (mii_lpa & LPA_LPACK)) mdio_write(dev, ep->phys[0], MII_BMCR, BMCR_ANENABLE|BMCR_ANRESTART); if (debug > 1) - printk(KERN_INFO "%s: Setting %s-duplex based on MII xcvr %d" - " register read of %4.4x.\n", dev->name, - ep->mii.full_duplex ? "full" : "half", - ep->phys[0], mii_lpa); + netdev_info(dev, "Setting %s-duplex based on MII xcvr %d register read of %4.4x.\n", + ep->mii.full_duplex ? "full" + : "half", + ep->phys[0], mii_lpa); } } @@ -738,10 +735,9 @@ static int epic_open(struct net_device *dev) TxUnderrun); if (debug > 1) { - printk(KERN_DEBUG "%s: epic_open() ioaddr %p IRQ %d " - "status %4.4x %s-duplex.\n", - dev->name, ioaddr, irq, er32(GENCTL), - ep->mii.full_duplex ? "full" : "half"); + netdev_dbg(dev, "epic_open() ioaddr %p IRQ %d status %4.4x %s-duplex.\n", + ioaddr, irq, er32(GENCTL), + ep->mii.full_duplex ? "full" : "half"); } /* Set the timer to switch to check for link beat and perhaps switch @@ -790,8 +786,8 @@ static void epic_restart(struct net_device *dev) /* Soft reset the chip. */ ew32(GENCTL, 0x4001); - printk(KERN_DEBUG "%s: Restarting the EPIC chip, Rx %d/%d Tx %d/%d.\n", - dev->name, ep->cur_rx, ep->dirty_rx, ep->dirty_tx, ep->cur_tx); + netdev_dbg(dev, "Restarting the EPIC chip, Rx %d/%d Tx %d/%d.\n", + ep->cur_rx, ep->dirty_rx, ep->dirty_tx, ep->cur_tx); udelay(1); /* This magic is documented in SMSC app note 7.15 */ @@ -827,9 +823,8 @@ static void epic_restart(struct net_device *dev) ((ep->chip_flags & TYPE2_INTR) ? PCIBusErr175 : PCIBusErr170) | TxUnderrun); - printk(KERN_DEBUG "%s: epic_restart() done, cmd status %4.4x, ctl %4.4x" - " interrupt %4.4x.\n", - dev->name, er32(COMMAND), er32(GENCTL), er32(INTSTAT)); + netdev_dbg(dev, "epic_restart() done, cmd status %4.4x, ctl %4.4x interrupt %4.4x.\n", + er32(COMMAND), er32(GENCTL), er32(INTSTAT)); } static void check_media(struct net_device *dev) @@ -846,9 +841,9 @@ static void check_media(struct net_device *dev) return; if (ep->mii.full_duplex != duplex) { ep->mii.full_duplex = duplex; - printk(KERN_INFO "%s: Setting %s-duplex based on MII #%d link" - " partner capability of %4.4x.\n", dev->name, - ep->mii.full_duplex ? "full" : "half", ep->phys[0], mii_lpa); + netdev_info(dev, "Setting %s-duplex based on MII #%d link partner capability of %4.4x.\n", + ep->mii.full_duplex ? "full" : "half", + ep->phys[0], mii_lpa); ew32(TxCtrl, ep->mii.full_duplex ? 0x7F : 0x79); } } @@ -861,11 +856,10 @@ static void epic_timer(unsigned long data) int next_tick = 5*HZ; if (debug > 3) { - printk(KERN_DEBUG "%s: Media monitor tick, Tx status %8.8x.\n", - dev->name, er32(TxSTAT)); - printk(KERN_DEBUG "%s: Other registers are IntMask %4.4x " - "IntStatus %4.4x RxStatus %4.4x.\n", dev->name, - er32(INTMASK), er32(INTSTAT), er32(RxSTAT)); + netdev_dbg(dev, "Media monitor tick, Tx status %8.8x.\n", + er32(TxSTAT)); + netdev_dbg(dev, "Other registers are IntMask %4.4x IntStatus %4.4x RxStatus %4.4x.\n", + er32(INTMASK), er32(INTSTAT), er32(RxSTAT)); } check_media(dev); @@ -880,11 +874,11 @@ static void epic_tx_timeout(struct net_device *dev) void __iomem *ioaddr = ep->ioaddr; if (debug > 0) { - printk(KERN_WARNING "%s: Transmit timeout using MII device, " - "Tx status %4.4x.\n", dev->name, er16(TxSTAT)); + netdev_warn(dev, "Transmit timeout using MII device, Tx status %4.4x.\n", + er16(TxSTAT)); if (debug > 1) { - printk(KERN_DEBUG "%s: Tx indices: dirty_tx %d, cur_tx %d.\n", - dev->name, ep->dirty_tx, ep->cur_tx); + netdev_dbg(dev, "Tx indices: dirty_tx %d, cur_tx %d.\n", + ep->dirty_tx, ep->cur_tx); } } if (er16(TxSTAT) & 0x10) { /* Tx FIFO underflow. */ @@ -994,9 +988,8 @@ static netdev_tx_t epic_start_xmit(struct sk_buff *skb, struct net_device *dev) ew32(COMMAND, TxQueued); if (debug > 4) - printk(KERN_DEBUG "%s: Queued Tx packet size %d to slot %d, " - "flag %2.2x Tx status %8.8x.\n", dev->name, skb->len, - entry, ctrl_word, er32(TxSTAT)); + netdev_dbg(dev, "Queued Tx packet size %d to slot %d, flag %2.2x Tx status %8.8x.\n", + skb->len, entry, ctrl_word, er32(TxSTAT)); return NETDEV_TX_OK; } @@ -1009,8 +1002,8 @@ static void epic_tx_error(struct net_device *dev, struct epic_private *ep, #ifndef final_version /* There was an major error, log it. */ if (debug > 1) - printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n", - dev->name, status); + netdev_dbg(dev, "Transmit error, Tx status %8.8x.\n", + status); #endif stats->tx_errors++; if (status & 0x1050) @@ -1057,9 +1050,8 @@ static void epic_tx(struct net_device *dev, struct epic_private *ep) #ifndef final_version if (cur_tx - dirty_tx > TX_RING_SIZE) { - printk(KERN_WARNING - "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n", - dev->name, dirty_tx, cur_tx, ep->tx_full); + netdev_warn(dev, "Out-of-sync dirty pointer, %d vs. %d, full=%d.\n", + dirty_tx, cur_tx, ep->tx_full); dirty_tx += TX_RING_SIZE; } #endif @@ -1086,8 +1078,8 @@ static irqreturn_t epic_interrupt(int irq, void *dev_instance) ew32(INTSTAT, status & EpicNormalEvent); if (debug > 4) { - printk(KERN_DEBUG "%s: Interrupt, status=%#8.8x new " - "intstat=%#8.8x.\n", dev->name, status, er32(INTSTAT)); + netdev_dbg(dev, "Interrupt, status=%#8.8x new intstat=%#8.8x.\n", + status, er32(INTSTAT)); } if ((status & IntrSummary) == 0) @@ -1125,8 +1117,8 @@ static irqreturn_t epic_interrupt(int irq, void *dev_instance) ew32(COMMAND, RestartTx); } if (status & PCIBusErr170) { - printk(KERN_ERR "%s: PCI Bus Error! status %4.4x.\n", - dev->name, status); + netdev_err(dev, "PCI Bus Error! status %4.4x.\n", + status); epic_pause(dev); epic_restart(dev); } @@ -1136,8 +1128,8 @@ static irqreturn_t epic_interrupt(int irq, void *dev_instance) out: if (debug > 3) { - printk(KERN_DEBUG "%s: exit interrupt, intr_status=%#4.4x.\n", - dev->name, status); + netdev_dbg(dev, "exit interrupt, intr_status=%#4.4x.\n", + status); } return IRQ_RETVAL(handled); @@ -1151,7 +1143,7 @@ static int epic_rx(struct net_device *dev, int budget) int work_done = 0; if (debug > 4) - printk(KERN_DEBUG " In epic_rx(), entry %d %8.8x.\n", entry, + netdev_dbg(dev, " In epic_rx(), entry %d %8.8x.\n", entry, ep->rx_ring[entry].rxstatus); if (rx_work_limit > budget) @@ -1162,16 +1154,17 @@ static int epic_rx(struct net_device *dev, int budget) int status = ep->rx_ring[entry].rxstatus; if (debug > 4) - printk(KERN_DEBUG " epic_rx() status was %8.8x.\n", status); + netdev_dbg(dev, " epic_rx() status was %8.8x.\n", + status); if (--rx_work_limit < 0) break; if (status & 0x2006) { if (debug > 2) - printk(KERN_DEBUG "%s: epic_rx() error status was %8.8x.\n", - dev->name, status); + netdev_dbg(dev, "epic_rx() error status was %8.8x.\n", + status); if (status & 0x2000) { - printk(KERN_WARNING "%s: Oversized Ethernet frame spanned " - "multiple buffers, status %4.4x!\n", dev->name, status); + netdev_warn(dev, "Oversized Ethernet frame spanned multiple buffers, status %4.4x!\n", + status); dev->stats.rx_length_errors++; } else if (status & 0x0006) /* Rx Frame errors are counted in hardware. */ @@ -1183,9 +1176,8 @@ static int epic_rx(struct net_device *dev, int budget) struct sk_buff *skb; if (pkt_len > PKT_BUF_SZ - 4) { - printk(KERN_ERR "%s: Oversized Ethernet frame, status %x " - "%d bytes.\n", - dev->name, status, pkt_len); + netdev_err(dev, "Oversized Ethernet frame, status %x %d bytes.\n", + status, pkt_len); pkt_len = 1514; } /* Check if the packet is long enough to accept without copying @@ -1305,8 +1297,8 @@ static int epic_close(struct net_device *dev) napi_disable(&ep->napi); if (debug > 1) - printk(KERN_DEBUG "%s: Shutting down ethercard, status was %2.2x.\n", - dev->name, er32(INTSTAT)); + netdev_dbg(dev, "Shutting down ethercard, status was %2.2x.\n", + er32(INTSTAT)); del_timer_sync(&ep->timer); @@ -1324,7 +1316,7 @@ static int epic_close(struct net_device *dev) ep->rx_ring[i].buflength = 0; if (skb) { pci_unmap_single(pdev, ep->rx_ring[i].bufaddr, - ep->rx_buf_sz, PCI_DMA_FROMDEVICE); + ep->rx_buf_sz, PCI_DMA_FROMDEVICE); dev_kfree_skb(skb); } ep->rx_ring[i].bufaddr = 0xBADF00D0; /* An invalid address. */ @@ -1587,8 +1579,7 @@ static int __init epic_init (void) { /* when a module, this is printed whether or not devices are found in probe */ #ifdef MODULE - printk (KERN_INFO "%s%s", - version, version2); + pr_info("%s%s\n", version, version2); #endif return pci_register_driver(&epic_driver); diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index afe01c4088a3..0f096a890059 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -106,16 +106,16 @@ MODULE_ALIAS("platform:smc911x"); #define POWER_DOWN 1 #if SMC_DEBUG > 0 -#define DBG(n, args...) \ +#define DBG(n, dev, args...) \ do { \ if (SMC_DEBUG & (n)) \ - printk(args); \ + netdev_dbg(dev, args); \ } while (0) -#define PRINTK(args...) printk(args) +#define PRINTK(dev, args...) netdev_info(dev, args) #else -#define DBG(n, args...) do { } while (0) -#define PRINTK(args...) printk(KERN_DEBUG args) +#define DBG(n, dev, args...) do { } while (0) +#define PRINTK(dev, args...) netdev_dbg(dev, args) #endif #if SMC_DEBUG_PKTS > 0 @@ -130,21 +130,23 @@ static void PRINT_PKT(u_char *buf, int length) for (i = 0; i < lines ; i ++) { int cur; + printk(KERN_DEBUG); for (cur = 0; cur < 8; cur++) { u_char a, b; a = *buf++; b = *buf++; - printk("%02x%02x ", a, b); + pr_cont("%02x%02x ", a, b); } - printk("\n"); + pr_cont("\n"); } + printk(KERN_DEBUG); for (i = 0; i < remainder/2 ; i++) { u_char a, b; a = *buf++; b = *buf++; - printk("%02x%02x ", a, b); + pr_cont("%02x%02x ", a, b); } - printk("\n"); + pr_cont("\n"); } #else #define PRINT_PKT(x...) do { } while (0) @@ -176,7 +178,7 @@ static void smc911x_reset(struct net_device *dev) unsigned int reg, timeout=0, resets=1, irq_cfg; unsigned long flags; - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); /* Take out of PM setting first */ if ((SMC_GET_PMT_CTRL(lp) & PMT_CTRL_READY_) == 0) { @@ -188,7 +190,7 @@ static void smc911x_reset(struct net_device *dev) reg = SMC_GET_PMT_CTRL(lp) & PMT_CTRL_READY_; } while (--timeout && !reg); if (timeout == 0) { - PRINTK("%s: smc911x_reset timeout waiting for PM restore\n", dev->name); + PRINTK(dev, "smc911x_reset timeout waiting for PM restore\n"); return; } } @@ -206,14 +208,14 @@ static void smc911x_reset(struct net_device *dev) reg = SMC_GET_HW_CFG(lp); /* If chip indicates reset timeout then try again */ if (reg & HW_CFG_SRST_TO_) { - PRINTK("%s: chip reset timeout, retrying...\n", dev->name); + PRINTK(dev, "chip reset timeout, retrying...\n"); resets++; break; } } while (--timeout && (reg & HW_CFG_SRST_)); } if (timeout == 0) { - PRINTK("%s: smc911x_reset timeout waiting for reset\n", dev->name); + PRINTK(dev, "smc911x_reset timeout waiting for reset\n"); return; } @@ -223,7 +225,7 @@ static void smc911x_reset(struct net_device *dev) udelay(10); if (timeout == 0){ - PRINTK("%s: smc911x_reset timeout waiting for EEPROM busy\n", dev->name); + PRINTK(dev, "smc911x_reset timeout waiting for EEPROM busy\n"); return; } @@ -270,7 +272,7 @@ static void smc911x_enable(struct net_device *dev) unsigned mask, cfg, cr; unsigned long flags; - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); spin_lock_irqsave(&lp->lock, flags); @@ -296,7 +298,7 @@ static void smc911x_enable(struct net_device *dev) /* Turn on receiver and enable RX */ if (cr & MAC_CR_RXEN_) - DBG(SMC_DEBUG_RX, "%s: Receiver already enabled\n", dev->name); + DBG(SMC_DEBUG_RX, dev, "Receiver already enabled\n"); SMC_SET_MAC_CR(lp, cr | MAC_CR_RXEN_); @@ -327,7 +329,7 @@ static void smc911x_shutdown(struct net_device *dev) unsigned cr; unsigned long flags; - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", CARDNAME, __func__); + DBG(SMC_DEBUG_FUNC, dev, "%s: --> %s\n", CARDNAME, __func__); /* Disable IRQ's */ SMC_SET_INT_EN(lp, 0); @@ -346,7 +348,8 @@ static inline void smc911x_drop_pkt(struct net_device *dev) struct smc911x_local *lp = netdev_priv(dev); unsigned int fifo_count, timeout, reg; - DBG(SMC_DEBUG_FUNC | SMC_DEBUG_RX, "%s: --> %s\n", CARDNAME, __func__); + DBG(SMC_DEBUG_FUNC | SMC_DEBUG_RX, dev, "%s: --> %s\n", + CARDNAME, __func__); fifo_count = SMC_GET_RX_FIFO_INF(lp) & 0xFFFF; if (fifo_count <= 4) { /* Manually dump the packet data */ @@ -361,7 +364,7 @@ static inline void smc911x_drop_pkt(struct net_device *dev) reg = SMC_GET_RX_DP_CTRL(lp) & RX_DP_CTRL_FFWD_BUSY_; } while (--timeout && reg); if (timeout == 0) { - PRINTK("%s: timeout waiting for RX fast forward\n", dev->name); + PRINTK(dev, "timeout waiting for RX fast forward\n"); } } } @@ -379,11 +382,11 @@ static inline void smc911x_rcv(struct net_device *dev) struct sk_buff *skb; unsigned char *data; - DBG(SMC_DEBUG_FUNC | SMC_DEBUG_RX, "%s: --> %s\n", - dev->name, __func__); + DBG(SMC_DEBUG_FUNC | SMC_DEBUG_RX, dev, "--> %s\n", + __func__); status = SMC_GET_RX_STS_FIFO(lp); - DBG(SMC_DEBUG_RX, "%s: Rx pkt len %d status 0x%08x\n", - dev->name, (status & 0x3fff0000) >> 16, status & 0xc000ffff); + DBG(SMC_DEBUG_RX, dev, "Rx pkt len %d status 0x%08x\n", + (status & 0x3fff0000) >> 16, status & 0xc000ffff); pkt_len = (status & RX_STS_PKT_LEN_) >> 16; if (status & RX_STS_ES_) { /* Deal with a bad packet */ @@ -403,8 +406,7 @@ static inline void smc911x_rcv(struct net_device *dev) /* Alloc a buffer with extra room for DMA alignment */ skb = netdev_alloc_skb(dev, pkt_len+32); if (unlikely(skb == NULL)) { - PRINTK( "%s: Low memory, rcvd packet dropped.\n", - dev->name); + PRINTK(dev, "Low memory, rcvd packet dropped.\n"); dev->stats.rx_dropped++; smc911x_drop_pkt(dev); return; @@ -422,8 +424,8 @@ static inline void smc911x_rcv(struct net_device *dev) /* Lower the FIFO threshold if possible */ fifo = SMC_GET_FIFO_INT(lp); if (fifo & 0xFF) fifo--; - DBG(SMC_DEBUG_RX, "%s: Setting RX stat FIFO threshold to %d\n", - dev->name, fifo & 0xff); + DBG(SMC_DEBUG_RX, dev, "Setting RX stat FIFO threshold to %d\n", + fifo & 0xff); SMC_SET_FIFO_INT(lp, fifo); /* Setup RX DMA */ SMC_SET_RX_CFG(lp, RX_CFG_RX_END_ALGN16_ | ((2<<8) & RX_CFG_RXDOFF_)); @@ -436,7 +438,7 @@ static inline void smc911x_rcv(struct net_device *dev) SMC_SET_RX_CFG(lp, RX_CFG_RX_END_ALGN4_ | ((2<<8) & RX_CFG_RXDOFF_)); SMC_PULL_DATA(lp, data, pkt_len+2+3); - DBG(SMC_DEBUG_PKTS, "%s: Received packet\n", dev->name); + DBG(SMC_DEBUG_PKTS, dev, "Received packet\n"); PRINT_PKT(data, ((pkt_len - 4) <= 64) ? pkt_len - 4 : 64); skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); @@ -456,7 +458,7 @@ static void smc911x_hardware_send_pkt(struct net_device *dev) unsigned int cmdA, cmdB, len; unsigned char *buf; - DBG(SMC_DEBUG_FUNC | SMC_DEBUG_TX, "%s: --> %s\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC | SMC_DEBUG_TX, dev, "--> %s\n", __func__); BUG_ON(lp->pending_tx_skb == NULL); skb = lp->pending_tx_skb; @@ -481,12 +483,12 @@ static void smc911x_hardware_send_pkt(struct net_device *dev) /* tag is packet length so we can use this in stats update later */ cmdB = (skb->len << 16) | (skb->len & 0x7FF); - DBG(SMC_DEBUG_TX, "%s: TX PKT LENGTH 0x%04x (%d) BUF 0x%p CMDA 0x%08x CMDB 0x%08x\n", - dev->name, len, len, buf, cmdA, cmdB); + DBG(SMC_DEBUG_TX, dev, "TX PKT LENGTH 0x%04x (%d) BUF 0x%p CMDA 0x%08x CMDB 0x%08x\n", + len, len, buf, cmdA, cmdB); SMC_SET_TX_FIFO(lp, cmdA); SMC_SET_TX_FIFO(lp, cmdB); - DBG(SMC_DEBUG_PKTS, "%s: Transmitted packet\n", dev->name); + DBG(SMC_DEBUG_PKTS, dev, "Transmitted packet\n"); PRINT_PKT(buf, len <= 64 ? len : 64); /* Send pkt via PIO or DMA */ @@ -517,20 +519,20 @@ static int smc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int free; unsigned long flags; - DBG(SMC_DEBUG_FUNC | SMC_DEBUG_TX, "%s: --> %s\n", - dev->name, __func__); + DBG(SMC_DEBUG_FUNC | SMC_DEBUG_TX, dev, "--> %s\n", + __func__); spin_lock_irqsave(&lp->lock, flags); BUG_ON(lp->pending_tx_skb != NULL); free = SMC_GET_TX_FIFO_INF(lp) & TX_FIFO_INF_TDFREE_; - DBG(SMC_DEBUG_TX, "%s: TX free space %d\n", dev->name, free); + DBG(SMC_DEBUG_TX, dev, "TX free space %d\n", free); /* Turn off the flow when running out of space in FIFO */ if (free <= SMC911X_TX_FIFO_LOW_THRESHOLD) { - DBG(SMC_DEBUG_TX, "%s: Disabling data flow due to low FIFO space (%d)\n", - dev->name, free); + DBG(SMC_DEBUG_TX, dev, "Disabling data flow due to low FIFO space (%d)\n", + free); /* Reenable when at least 1 packet of size MTU present */ SMC_SET_FIFO_TDA(lp, (SMC911X_TX_FIFO_LOW_THRESHOLD)/64); lp->tx_throttle = 1; @@ -545,8 +547,8 @@ static int smc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) * End padding 15 bytes */ if (unlikely(free < (skb->len + 8 + 15 + 15))) { - printk("%s: No Tx free space %d < %d\n", - dev->name, free, skb->len); + netdev_warn(dev, "No Tx free space %d < %d\n", + free, skb->len); lp->pending_tx_skb = NULL; dev->stats.tx_errors++; dev->stats.tx_dropped++; @@ -561,13 +563,13 @@ static int smc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) * the DMA IRQ starts it */ if (lp->txdma_active) { - DBG(SMC_DEBUG_TX | SMC_DEBUG_DMA, "%s: Tx DMA running, deferring packet\n", dev->name); + DBG(SMC_DEBUG_TX | SMC_DEBUG_DMA, dev, "Tx DMA running, deferring packet\n"); lp->pending_tx_skb = skb; netif_stop_queue(dev); spin_unlock_irqrestore(&lp->lock, flags); return NETDEV_TX_OK; } else { - DBG(SMC_DEBUG_TX | SMC_DEBUG_DMA, "%s: Activating Tx DMA\n", dev->name); + DBG(SMC_DEBUG_TX | SMC_DEBUG_DMA, dev, "Activating Tx DMA\n"); lp->txdma_active = 1; } } @@ -589,20 +591,19 @@ static void smc911x_tx(struct net_device *dev) struct smc911x_local *lp = netdev_priv(dev); unsigned int tx_status; - DBG(SMC_DEBUG_FUNC | SMC_DEBUG_TX, "%s: --> %s\n", - dev->name, __func__); + DBG(SMC_DEBUG_FUNC | SMC_DEBUG_TX, dev, "--> %s\n", + __func__); /* Collect the TX status */ while (((SMC_GET_TX_FIFO_INF(lp) & TX_FIFO_INF_TSUSED_) >> 16) != 0) { - DBG(SMC_DEBUG_TX, "%s: Tx stat FIFO used 0x%04x\n", - dev->name, - (SMC_GET_TX_FIFO_INF(lp) & TX_FIFO_INF_TSUSED_) >> 16); + DBG(SMC_DEBUG_TX, dev, "Tx stat FIFO used 0x%04x\n", + (SMC_GET_TX_FIFO_INF(lp) & TX_FIFO_INF_TSUSED_) >> 16); tx_status = SMC_GET_TX_STS_FIFO(lp); dev->stats.tx_packets++; dev->stats.tx_bytes+=tx_status>>16; - DBG(SMC_DEBUG_TX, "%s: Tx FIFO tag 0x%04x status 0x%04x\n", - dev->name, (tx_status & 0xffff0000) >> 16, - tx_status & 0x0000ffff); + DBG(SMC_DEBUG_TX, dev, "Tx FIFO tag 0x%04x status 0x%04x\n", + (tx_status & 0xffff0000) >> 16, + tx_status & 0x0000ffff); /* count Tx errors, but ignore lost carrier errors when in * full-duplex mode */ if ((tx_status & TX_STS_ES_) && !(lp->ctl_rfduplx && @@ -640,8 +641,8 @@ static int smc911x_phy_read(struct net_device *dev, int phyaddr, int phyreg) SMC_GET_MII(lp, phyreg, phyaddr, phydata); - DBG(SMC_DEBUG_MISC, "%s: phyaddr=0x%x, phyreg=0x%02x, phydata=0x%04x\n", - __func__, phyaddr, phyreg, phydata); + DBG(SMC_DEBUG_MISC, dev, "%s: phyaddr=0x%x, phyreg=0x%02x, phydata=0x%04x\n", + __func__, phyaddr, phyreg, phydata); return phydata; } @@ -654,8 +655,8 @@ static void smc911x_phy_write(struct net_device *dev, int phyaddr, int phyreg, { struct smc911x_local *lp = netdev_priv(dev); - DBG(SMC_DEBUG_MISC, "%s: phyaddr=0x%x, phyreg=0x%x, phydata=0x%x\n", - __func__, phyaddr, phyreg, phydata); + DBG(SMC_DEBUG_MISC, dev, "%s: phyaddr=0x%x, phyreg=0x%x, phydata=0x%x\n", + __func__, phyaddr, phyreg, phydata); SMC_SET_MII(lp, phyreg, phyaddr, phydata); } @@ -670,7 +671,7 @@ static void smc911x_phy_detect(struct net_device *dev) int phyaddr; unsigned int cfg, id1, id2; - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); lp->phy_type = 0; @@ -731,8 +732,8 @@ static void smc911x_phy_detect(struct net_device *dev) lp->phy_type = id1 << 16 | id2; } - DBG(SMC_DEBUG_MISC, "%s: phy_id1=0x%x, phy_id2=0x%x phyaddr=0x%d\n", - dev->name, id1, id2, lp->mii.phy_id); + DBG(SMC_DEBUG_MISC, dev, "phy_id1=0x%x, phy_id2=0x%x phyaddr=0x%d\n", + id1, id2, lp->mii.phy_id); } /* @@ -745,7 +746,7 @@ static int smc911x_phy_fixed(struct net_device *dev) int phyaddr = lp->mii.phy_id; int bmcr; - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); /* Enter Link Disable state */ SMC_GET_PHY_BMCR(lp, phyaddr, bmcr); @@ -792,7 +793,7 @@ static int smc911x_phy_reset(struct net_device *dev, int phy) unsigned long flags; unsigned int reg; - DBG(SMC_DEBUG_FUNC, "%s: --> %s()\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s()\n", __func__); spin_lock_irqsave(&lp->lock, flags); reg = SMC_GET_PMT_CTRL(lp); @@ -851,18 +852,18 @@ static void smc911x_phy_check_media(struct net_device *dev, int init) int phyaddr = lp->mii.phy_id; unsigned int bmcr, cr; - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); if (mii_check_media(&lp->mii, netif_msg_link(lp), init)) { /* duplex state has changed */ SMC_GET_PHY_BMCR(lp, phyaddr, bmcr); SMC_GET_MAC_CR(lp, cr); if (lp->mii.full_duplex) { - DBG(SMC_DEBUG_MISC, "%s: Configuring for full-duplex mode\n", dev->name); + DBG(SMC_DEBUG_MISC, dev, "Configuring for full-duplex mode\n"); bmcr |= BMCR_FULLDPLX; cr |= MAC_CR_RCVOWN_; } else { - DBG(SMC_DEBUG_MISC, "%s: Configuring for half-duplex mode\n", dev->name); + DBG(SMC_DEBUG_MISC, dev, "Configuring for half-duplex mode\n"); bmcr &= ~BMCR_FULLDPLX; cr &= ~MAC_CR_RCVOWN_; } @@ -891,7 +892,7 @@ static void smc911x_phy_configure(struct work_struct *work) int status; unsigned long flags; - DBG(SMC_DEBUG_FUNC, "%s: --> %s()\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s()\n", __func__); /* * We should not be called if phy_type is zero. @@ -900,7 +901,7 @@ static void smc911x_phy_configure(struct work_struct *work) return; if (smc911x_phy_reset(dev, phyaddr)) { - printk("%s: PHY reset timed out\n", dev->name); + netdev_info(dev, "PHY reset timed out\n"); return; } spin_lock_irqsave(&lp->lock, flags); @@ -922,7 +923,7 @@ static void smc911x_phy_configure(struct work_struct *work) /* Copy our capabilities from MII_BMSR to MII_ADVERTISE */ SMC_GET_PHY_BMSR(lp, phyaddr, my_phy_caps); if (!(my_phy_caps & BMSR_ANEGCAPABLE)) { - printk(KERN_INFO "Auto negotiation NOT supported\n"); + netdev_info(dev, "Auto negotiation NOT supported\n"); smc911x_phy_fixed(dev); goto smc911x_phy_configure_exit; } @@ -960,8 +961,8 @@ static void smc911x_phy_configure(struct work_struct *work) udelay(10); SMC_GET_PHY_MII_ADV(lp, phyaddr, status); - DBG(SMC_DEBUG_MISC, "%s: phy caps=0x%04x\n", dev->name, my_phy_caps); - DBG(SMC_DEBUG_MISC, "%s: phy advertised caps=0x%04x\n", dev->name, my_ad_caps); + DBG(SMC_DEBUG_MISC, dev, "phy caps=0x%04x\n", my_phy_caps); + DBG(SMC_DEBUG_MISC, dev, "phy advertised caps=0x%04x\n", my_ad_caps); /* Restart auto-negotiation process in order to advertise my caps */ SMC_SET_PHY_BMCR(lp, phyaddr, BMCR_ANENABLE | BMCR_ANRESTART); @@ -984,7 +985,7 @@ static void smc911x_phy_interrupt(struct net_device *dev) int phyaddr = lp->mii.phy_id; int status; - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); if (lp->phy_type == 0) return; @@ -992,10 +993,10 @@ static void smc911x_phy_interrupt(struct net_device *dev) smc911x_phy_check_media(dev, 0); /* read to clear status bits */ SMC_GET_PHY_INT_SRC(lp, phyaddr,status); - DBG(SMC_DEBUG_MISC, "%s: PHY interrupt status 0x%04x\n", - dev->name, status & 0xffff); - DBG(SMC_DEBUG_MISC, "%s: AFC_CFG 0x%08x\n", - dev->name, SMC_GET_AFC_CFG(lp)); + DBG(SMC_DEBUG_MISC, dev, "PHY interrupt status 0x%04x\n", + status & 0xffff); + DBG(SMC_DEBUG_MISC, dev, "AFC_CFG 0x%08x\n", + SMC_GET_AFC_CFG(lp)); } /*--- END PHY CONTROL AND CONFIGURATION-------------------------------------*/ @@ -1012,7 +1013,7 @@ static irqreturn_t smc911x_interrupt(int irq, void *dev_id) unsigned int rx_overrun=0, cr, pkts; unsigned long flags; - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); spin_lock_irqsave(&lp->lock, flags); @@ -1033,8 +1034,8 @@ static irqreturn_t smc911x_interrupt(int irq, void *dev_id) do { status = SMC_GET_INT(lp); - DBG(SMC_DEBUG_MISC, "%s: INT 0x%08x MASK 0x%08x OUTSIDE MASK 0x%08x\n", - dev->name, status, mask, status & ~mask); + DBG(SMC_DEBUG_MISC, dev, "INT 0x%08x MASK 0x%08x OUTSIDE MASK 0x%08x\n", + status, mask, status & ~mask); status &= mask; if (!status) @@ -1066,7 +1067,7 @@ static irqreturn_t smc911x_interrupt(int irq, void *dev_id) SMC_GET_MAC_CR(lp, cr); cr &= ~MAC_CR_RXEN_; SMC_SET_MAC_CR(lp, cr); - DBG(SMC_DEBUG_RX, "%s: RX overrun\n", dev->name); + DBG(SMC_DEBUG_RX, dev, "RX overrun\n"); dev->stats.rx_errors++; dev->stats.rx_fifo_errors++; } @@ -1078,7 +1079,7 @@ static irqreturn_t smc911x_interrupt(int irq, void *dev_id) cr &= ~MAC_CR_RXEN_; SMC_SET_MAC_CR(lp, cr); rx_overrun=1; - DBG(SMC_DEBUG_RX, "%s: RX overrun\n", dev->name); + DBG(SMC_DEBUG_RX, dev, "RX overrun\n"); dev->stats.rx_errors++; dev->stats.rx_fifo_errors++; } @@ -1087,23 +1088,23 @@ static irqreturn_t smc911x_interrupt(int irq, void *dev_id) /* Handle receive condition */ if ((status & INT_STS_RSFL_) || rx_overrun) { unsigned int fifo; - DBG(SMC_DEBUG_RX, "%s: RX irq\n", dev->name); + DBG(SMC_DEBUG_RX, dev, "RX irq\n"); fifo = SMC_GET_RX_FIFO_INF(lp); pkts = (fifo & RX_FIFO_INF_RXSUSED_) >> 16; - DBG(SMC_DEBUG_RX, "%s: Rx FIFO pkts %d, bytes %d\n", - dev->name, pkts, fifo & 0xFFFF ); + DBG(SMC_DEBUG_RX, dev, "Rx FIFO pkts %d, bytes %d\n", + pkts, fifo & 0xFFFF); if (pkts != 0) { #ifdef SMC_USE_DMA unsigned int fifo; if (lp->rxdma_active){ - DBG(SMC_DEBUG_RX | SMC_DEBUG_DMA, - "%s: RX DMA active\n", dev->name); + DBG(SMC_DEBUG_RX | SMC_DEBUG_DMA, dev, + "RX DMA active\n"); /* The DMA is already running so up the IRQ threshold */ fifo = SMC_GET_FIFO_INT(lp) & ~0xFF; fifo |= pkts & 0xFF; - DBG(SMC_DEBUG_RX, - "%s: Setting RX stat FIFO threshold to %d\n", - dev->name, fifo & 0xff); + DBG(SMC_DEBUG_RX, dev, + "Setting RX stat FIFO threshold to %d\n", + fifo & 0xff); SMC_SET_FIFO_INT(lp, fifo); } else #endif @@ -1113,7 +1114,7 @@ static irqreturn_t smc911x_interrupt(int irq, void *dev_id) } /* Handle transmit FIFO available */ if (status & INT_STS_TDFA_) { - DBG(SMC_DEBUG_TX, "%s: TX data FIFO space available irq\n", dev->name); + DBG(SMC_DEBUG_TX, dev, "TX data FIFO space available irq\n"); SMC_SET_FIFO_TDA(lp, 0xFF); lp->tx_throttle = 0; #ifdef SMC_USE_DMA @@ -1125,9 +1126,9 @@ static irqreturn_t smc911x_interrupt(int irq, void *dev_id) /* Handle transmit done condition */ #if 1 if (status & (INT_STS_TSFL_ | INT_STS_GPT_INT_)) { - DBG(SMC_DEBUG_TX | SMC_DEBUG_MISC, - "%s: Tx stat FIFO limit (%d) /GPT irq\n", - dev->name, (SMC_GET_FIFO_INT(lp) & 0x00ff0000) >> 16); + DBG(SMC_DEBUG_TX | SMC_DEBUG_MISC, dev, + "Tx stat FIFO limit (%d) /GPT irq\n", + (SMC_GET_FIFO_INT(lp) & 0x00ff0000) >> 16); smc911x_tx(dev); SMC_SET_GPT_CFG(lp, GPT_CFG_TIMER_EN_ | 10000); SMC_ACK_INT(lp, INT_STS_TSFL_); @@ -1135,23 +1136,20 @@ static irqreturn_t smc911x_interrupt(int irq, void *dev_id) } #else if (status & INT_STS_TSFL_) { - DBG(SMC_DEBUG_TX, "%s: TX status FIFO limit (%d) irq\n", dev->name, ); + DBG(SMC_DEBUG_TX, dev, "TX status FIFO limit (%d) irq\n", ?); smc911x_tx(dev); SMC_ACK_INT(lp, INT_STS_TSFL_); } if (status & INT_STS_GPT_INT_) { - DBG(SMC_DEBUG_RX, "%s: IRQ_CFG 0x%08x FIFO_INT 0x%08x RX_CFG 0x%08x\n", - dev->name, - SMC_GET_IRQ_CFG(lp), - SMC_GET_FIFO_INT(lp), - SMC_GET_RX_CFG(lp)); - DBG(SMC_DEBUG_RX, "%s: Rx Stat FIFO Used 0x%02x " - "Data FIFO Used 0x%04x Stat FIFO 0x%08x\n", - dev->name, - (SMC_GET_RX_FIFO_INF(lp) & 0x00ff0000) >> 16, - SMC_GET_RX_FIFO_INF(lp) & 0xffff, - SMC_GET_RX_STS_FIFO_PEEK(lp)); + DBG(SMC_DEBUG_RX, dev, "IRQ_CFG 0x%08x FIFO_INT 0x%08x RX_CFG 0x%08x\n", + SMC_GET_IRQ_CFG(lp), + SMC_GET_FIFO_INT(lp), + SMC_GET_RX_CFG(lp)); + DBG(SMC_DEBUG_RX, dev, "Rx Stat FIFO Used 0x%02x Data FIFO Used 0x%04x Stat FIFO 0x%08x\n", + (SMC_GET_RX_FIFO_INF(lp) & 0x00ff0000) >> 16, + SMC_GET_RX_FIFO_INF(lp) & 0xffff, + SMC_GET_RX_STS_FIFO_PEEK(lp)); SMC_SET_GPT_CFG(lp, GPT_CFG_TIMER_EN_ | 10000); SMC_ACK_INT(lp, INT_STS_GPT_INT_); } @@ -1159,7 +1157,7 @@ static irqreturn_t smc911x_interrupt(int irq, void *dev_id) /* Handle PHY interrupt condition */ if (status & INT_STS_PHY_INT_) { - DBG(SMC_DEBUG_MISC, "%s: PHY irq\n", dev->name); + DBG(SMC_DEBUG_MISC, dev, "PHY irq\n"); smc911x_phy_interrupt(dev); SMC_ACK_INT(lp, INT_STS_PHY_INT_); } @@ -1168,8 +1166,8 @@ static irqreturn_t smc911x_interrupt(int irq, void *dev_id) /* restore mask state */ SMC_SET_INT_EN(lp, mask); - DBG(SMC_DEBUG_MISC, "%s: Interrupt done (%d loops)\n", - dev->name, 8-timeout); + DBG(SMC_DEBUG_MISC, dev, "Interrupt done (%d loops)\n", + 8-timeout); spin_unlock_irqrestore(&lp->lock, flags); @@ -1185,9 +1183,9 @@ smc911x_tx_dma_irq(int dma, void *data) struct sk_buff *skb = lp->current_tx_skb; unsigned long flags; - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); - DBG(SMC_DEBUG_TX | SMC_DEBUG_DMA, "%s: TX DMA irq handler\n", dev->name); + DBG(SMC_DEBUG_TX | SMC_DEBUG_DMA, dev, "TX DMA irq handler\n"); /* Clear the DMA interrupt sources */ SMC_DMA_ACK_IRQ(dev, dma); BUG_ON(skb == NULL); @@ -1198,8 +1196,8 @@ smc911x_tx_dma_irq(int dma, void *data) if (lp->pending_tx_skb != NULL) smc911x_hardware_send_pkt(dev); else { - DBG(SMC_DEBUG_TX | SMC_DEBUG_DMA, - "%s: No pending Tx packets. DMA disabled\n", dev->name); + DBG(SMC_DEBUG_TX | SMC_DEBUG_DMA, dev, + "No pending Tx packets. DMA disabled\n"); spin_lock_irqsave(&lp->lock, flags); lp->txdma_active = 0; if (!lp->tx_throttle) { @@ -1208,8 +1206,8 @@ smc911x_tx_dma_irq(int dma, void *data) spin_unlock_irqrestore(&lp->lock, flags); } - DBG(SMC_DEBUG_TX | SMC_DEBUG_DMA, - "%s: TX DMA irq completed\n", dev->name); + DBG(SMC_DEBUG_TX | SMC_DEBUG_DMA, dev, + "TX DMA irq completed\n"); } static void smc911x_rx_dma_irq(int dma, void *data) @@ -1221,8 +1219,8 @@ smc911x_rx_dma_irq(int dma, void *data) unsigned long flags; unsigned int pkts; - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); - DBG(SMC_DEBUG_RX | SMC_DEBUG_DMA, "%s: RX DMA irq handler\n", dev->name); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); + DBG(SMC_DEBUG_RX | SMC_DEBUG_DMA, dev, "RX DMA irq handler\n"); /* Clear the DMA interrupt sources */ SMC_DMA_ACK_IRQ(dev, dma); dma_unmap_single(NULL, rx_dmabuf, rx_dmalen, DMA_FROM_DEVICE); @@ -1242,9 +1240,9 @@ smc911x_rx_dma_irq(int dma, void *data) lp->rxdma_active = 0; } spin_unlock_irqrestore(&lp->lock, flags); - DBG(SMC_DEBUG_RX | SMC_DEBUG_DMA, - "%s: RX DMA irq completed. DMA RX FIFO PKTS %d\n", - dev->name, pkts); + DBG(SMC_DEBUG_RX | SMC_DEBUG_DMA, dev, + "RX DMA irq completed. DMA RX FIFO PKTS %d\n", + pkts); } #endif /* SMC_USE_DMA */ @@ -1268,14 +1266,14 @@ static void smc911x_timeout(struct net_device *dev) int status, mask; unsigned long flags; - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); spin_lock_irqsave(&lp->lock, flags); status = SMC_GET_INT(lp); mask = SMC_GET_INT_EN(lp); spin_unlock_irqrestore(&lp->lock, flags); - DBG(SMC_DEBUG_MISC, "%s: INT 0x%02x MASK 0x%02x\n", - dev->name, status, mask); + DBG(SMC_DEBUG_MISC, dev, "INT 0x%02x MASK 0x%02x\n", + status, mask); /* Dump the current TX FIFO contents and restart */ mask = SMC_GET_TX_CFG(lp); @@ -1306,7 +1304,7 @@ static void smc911x_set_multicast_list(struct net_device *dev) unsigned int mcr, update_multicast = 0; unsigned long flags; - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); spin_lock_irqsave(&lp->lock, flags); SMC_GET_MAC_CR(lp, mcr); @@ -1314,7 +1312,7 @@ static void smc911x_set_multicast_list(struct net_device *dev) if (dev->flags & IFF_PROMISC) { - DBG(SMC_DEBUG_MISC, "%s: RCR_PRMS\n", dev->name); + DBG(SMC_DEBUG_MISC, dev, "RCR_PRMS\n"); mcr |= MAC_CR_PRMS_; } /* @@ -1323,7 +1321,7 @@ static void smc911x_set_multicast_list(struct net_device *dev) * checked before the table is */ else if (dev->flags & IFF_ALLMULTI || netdev_mc_count(dev) > 16) { - DBG(SMC_DEBUG_MISC, "%s: RCR_ALMUL\n", dev->name); + DBG(SMC_DEBUG_MISC, dev, "RCR_ALMUL\n"); mcr |= MAC_CR_MCPAS_; } @@ -1363,8 +1361,7 @@ static void smc911x_set_multicast_list(struct net_device *dev) /* now, the table can be loaded into the chipset */ update_multicast = 1; } else { - DBG(SMC_DEBUG_MISC, "%s: ~(MAC_CR_PRMS_|MAC_CR_MCPAS_)\n", - dev->name); + DBG(SMC_DEBUG_MISC, dev, "~(MAC_CR_PRMS_|MAC_CR_MCPAS_)\n"); mcr &= ~(MAC_CR_PRMS_ | MAC_CR_MCPAS_); /* @@ -1378,9 +1375,9 @@ static void smc911x_set_multicast_list(struct net_device *dev) spin_lock_irqsave(&lp->lock, flags); SMC_SET_MAC_CR(lp, mcr); if (update_multicast) { - DBG(SMC_DEBUG_MISC, - "%s: update mcast hash table 0x%08x 0x%08x\n", - dev->name, multicast_table[0], multicast_table[1]); + DBG(SMC_DEBUG_MISC, dev, + "update mcast hash table 0x%08x 0x%08x\n", + multicast_table[0], multicast_table[1]); SMC_SET_HASHL(lp, multicast_table[0]); SMC_SET_HASHH(lp, multicast_table[1]); } @@ -1398,7 +1395,7 @@ smc911x_open(struct net_device *dev) { struct smc911x_local *lp = netdev_priv(dev); - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); /* reset the hardware */ smc911x_reset(dev); @@ -1425,7 +1422,7 @@ static int smc911x_close(struct net_device *dev) { struct smc911x_local *lp = netdev_priv(dev); - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); netif_stop_queue(dev); netif_carrier_off(dev); @@ -1459,7 +1456,7 @@ smc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd) int ret, status; unsigned long flags; - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); cmd->maxtxpkt = 1; cmd->maxrxpkt = 1; @@ -1597,16 +1594,16 @@ static int smc911x_ethtool_wait_eeprom_ready(struct net_device *dev) e2p_cmd = SMC_GET_E2P_CMD(lp); for(timeout=10;(e2p_cmd & E2P_CMD_EPC_BUSY_) && timeout; timeout--) { if (e2p_cmd & E2P_CMD_EPC_TIMEOUT_) { - PRINTK("%s: %s timeout waiting for EEPROM to respond\n", - dev->name, __func__); + PRINTK(dev, "%s timeout waiting for EEPROM to respond\n", + __func__); return -EFAULT; } mdelay(1); e2p_cmd = SMC_GET_E2P_CMD(lp); } if (timeout == 0) { - PRINTK("%s: %s timeout waiting for EEPROM CMD not busy\n", - dev->name, __func__); + PRINTK(dev, "%s timeout waiting for EEPROM CMD not busy\n", + __func__); return -ETIMEDOUT; } return 0; @@ -1719,7 +1716,7 @@ static int smc911x_findirq(struct net_device *dev) int timeout = 20; unsigned long cookie; - DBG(SMC_DEBUG_FUNC, "--> %s\n", __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); cookie = probe_irq_on(); @@ -1799,13 +1796,14 @@ static int smc911x_probe(struct net_device *dev) const char *version_string; unsigned long irq_flags; - DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); + DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); /* First, see if the endian word is recognized */ val = SMC_GET_BYTE_TEST(lp); - DBG(SMC_DEBUG_MISC, "%s: endian probe returned 0x%04x\n", CARDNAME, val); + DBG(SMC_DEBUG_MISC, dev, "%s: endian probe returned 0x%04x\n", + CARDNAME, val); if (val != 0x87654321) { - printk(KERN_ERR "Invalid chip endian 0x%08x\n",val); + netdev_err(dev, "Invalid chip endian 0x%08x\n", val); retval = -ENODEV; goto err_out; } @@ -1816,26 +1814,29 @@ static int smc911x_probe(struct net_device *dev) * as future revisions could be added. */ chip_id = SMC_GET_PN(lp); - DBG(SMC_DEBUG_MISC, "%s: id probe returned 0x%04x\n", CARDNAME, chip_id); + DBG(SMC_DEBUG_MISC, dev, "%s: id probe returned 0x%04x\n", + CARDNAME, chip_id); for(i=0;chip_ids[i].id != 0; i++) { if (chip_ids[i].id == chip_id) break; } if (!chip_ids[i].id) { - printk(KERN_ERR "Unknown chip ID %04x\n", chip_id); + netdev_err(dev, "Unknown chip ID %04x\n", chip_id); retval = -ENODEV; goto err_out; } version_string = chip_ids[i].name; revision = SMC_GET_REV(lp); - DBG(SMC_DEBUG_MISC, "%s: revision = 0x%04x\n", CARDNAME, revision); + DBG(SMC_DEBUG_MISC, dev, "%s: revision = 0x%04x\n", CARDNAME, revision); /* At this point I'll assume that the chip is an SMC911x. */ - DBG(SMC_DEBUG_MISC, "%s: Found a %s\n", CARDNAME, chip_ids[i].name); + DBG(SMC_DEBUG_MISC, dev, "%s: Found a %s\n", + CARDNAME, chip_ids[i].name); /* Validate the TX FIFO size requested */ if ((tx_fifo_kb < 2) || (tx_fifo_kb > 14)) { - printk(KERN_ERR "Invalid TX FIFO size requested %d\n", tx_fifo_kb); + netdev_err(dev, "Invalid TX FIFO size requested %d\n", + tx_fifo_kb); retval = -EINVAL; goto err_out; } @@ -1887,14 +1888,13 @@ static int smc911x_probe(struct net_device *dev) case 14:/* 1920 Rx Data Fifo Size */ lp->afc_cfg=0x0006032F;break; default: - PRINTK("%s: ERROR -- no AFC_CFG setting found", - dev->name); + PRINTK(dev, "ERROR -- no AFC_CFG setting found"); break; } - DBG(SMC_DEBUG_MISC | SMC_DEBUG_TX | SMC_DEBUG_RX, - "%s: tx_fifo %d rx_fifo %d afc_cfg 0x%08x\n", CARDNAME, - lp->tx_fifo_size, lp->rx_fifo_size, lp->afc_cfg); + DBG(SMC_DEBUG_MISC | SMC_DEBUG_TX | SMC_DEBUG_RX, dev, + "%s: tx_fifo %d rx_fifo %d afc_cfg 0x%08x\n", CARDNAME, + lp->tx_fifo_size, lp->rx_fifo_size, lp->afc_cfg); spin_lock_init(&lp->lock); @@ -1924,8 +1924,7 @@ static int smc911x_probe(struct net_device *dev) } } if (dev->irq == 0) { - printk("%s: Couldn't autodetect your IRQ. Use irq=xx.\n", - dev->name); + netdev_warn(dev, "Couldn't autodetect your IRQ. Use irq=xx.\n"); retval = -ENODEV; goto err_out; } @@ -1980,33 +1979,32 @@ static int smc911x_probe(struct net_device *dev) retval = register_netdev(dev); if (retval == 0) { /* now, print out the card info, in a short format.. */ - printk("%s: %s (rev %d) at %#lx IRQ %d", - dev->name, version_string, lp->revision, - dev->base_addr, dev->irq); + netdev_info(dev, "%s (rev %d) at %#lx IRQ %d", + version_string, lp->revision, + dev->base_addr, dev->irq); #ifdef SMC_USE_DMA if (lp->rxdma != -1) - printk(" RXDMA %d ", lp->rxdma); + pr_cont(" RXDMA %d", lp->rxdma); if (lp->txdma != -1) - printk("TXDMA %d", lp->txdma); + pr_cont(" TXDMA %d", lp->txdma); #endif - printk("\n"); + pr_cont("\n"); if (!is_valid_ether_addr(dev->dev_addr)) { - printk("%s: Invalid ethernet MAC address. Please " - "set using ifconfig\n", dev->name); + netdev_warn(dev, "Invalid ethernet MAC address. Please set using ifconfig\n"); } else { /* Print the Ethernet address */ - printk("%s: Ethernet addr: %pM\n", - dev->name, dev->dev_addr); + netdev_info(dev, "Ethernet addr: %pM\n", + dev->dev_addr); } if (lp->phy_type == 0) { - PRINTK("%s: No PHY found\n", dev->name); + PRINTK(dev, "No PHY found\n"); } else if ((lp->phy_type & ~0xff) == LAN911X_INTERNAL_PHY_ID) { - PRINTK("%s: LAN911x Internal PHY\n", dev->name); + PRINTK(dev, "LAN911x Internal PHY\n"); } else { - PRINTK("%s: External PHY 0x%08x\n", dev->name, lp->phy_type); + PRINTK(dev, "External PHY 0x%08x\n", lp->phy_type); } } @@ -2025,7 +2023,7 @@ err_out: } /* - * smc911x_init(void) + * smc911x_drv_probe(void) * * Output: * 0 --> there is a device @@ -2039,6 +2037,7 @@ static int smc911x_drv_probe(struct platform_device *pdev) void __iomem *addr; int ret; + /* ndev is not valid yet, so avoid passing it in. */ DBG(SMC_DEBUG_FUNC, "--> %s\n", __func__); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { @@ -2093,7 +2092,7 @@ release_both: release_1: release_mem_region(res->start, SMC911X_IO_EXTENT); out: - printk("%s: not found (%d).\n", CARDNAME, ret); + pr_info("%s: not found (%d).\n", CARDNAME, ret); } #ifdef SMC_USE_DMA else { @@ -2111,7 +2110,7 @@ static int smc911x_drv_remove(struct platform_device *pdev) struct smc911x_local *lp = netdev_priv(ndev); struct resource *res; - DBG(SMC_DEBUG_FUNC, "--> %s\n", __func__); + DBG(SMC_DEBUG_FUNC, ndev, "--> %s\n", __func__); unregister_netdev(ndev); @@ -2140,7 +2139,7 @@ static int smc911x_drv_suspend(struct platform_device *dev, pm_message_t state) struct net_device *ndev = platform_get_drvdata(dev); struct smc911x_local *lp = netdev_priv(ndev); - DBG(SMC_DEBUG_FUNC, "--> %s\n", __func__); + DBG(SMC_DEBUG_FUNC, ndev, "--> %s\n", __func__); if (ndev) { if (netif_running(ndev)) { netif_device_detach(ndev); @@ -2158,7 +2157,7 @@ static int smc911x_drv_resume(struct platform_device *dev) { struct net_device *ndev = platform_get_drvdata(dev); - DBG(SMC_DEBUG_FUNC, "--> %s\n", __func__); + DBG(SMC_DEBUG_FUNC, ndev, "--> %s\n", __func__); if (ndev) { struct smc911x_local *lp = netdev_priv(ndev); diff --git a/drivers/net/ethernet/smsc/smc911x.h b/drivers/net/ethernet/smsc/smc911x.h index d51261ba4642..9965da39281b 100644 --- a/drivers/net/ethernet/smsc/smc911x.h +++ b/drivers/net/ethernet/smsc/smc911x.h @@ -227,7 +227,7 @@ static inline void SMC_outsl(struct smc911x_local *lp, int reg, #define SMC_DMA_ACK_IRQ(dev, dma) \ { \ if (DCSR(dma) & DCSR_BUSERR) { \ - printk("%s: DMA %d bus error!\n", dev->name, dma); \ + netdev_err(dev, "DMA %d bus error!\n", dma); \ } \ DCSR(dma) = DCSR_STARTINTR|DCSR_ENDINTR|DCSR_BUSERR; \ } diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c index e85c2e7e8246..51aa9cf3a3e7 100644 --- a/drivers/net/ethernet/smsc/smc9194.c +++ b/drivers/net/ethernet/smsc/smc9194.c @@ -55,7 +55,7 @@ ----------------------------------------------------------------------------*/ static const char version[] = - "smc9194.c:v0.14 12/15/00 by Erik Stahlman (erik@vt.edu)\n"; + "smc9194.c:v0.14 12/15/00 by Erik Stahlman (erik@vt.edu)"; #include <linux/module.h> #include <linux/kernel.h> @@ -612,7 +612,7 @@ static void smc_hardware_send_packet( struct net_device * dev ) packet_no = inb( ioaddr + PNR_ARR + 1 ); if ( packet_no & 0x80 ) { /* or isn't there? BAD CHIP! */ - printk(KERN_DEBUG CARDNAME": Memory allocation failed.\n"); + netdev_dbg(dev, CARDNAME": Memory allocation failed.\n"); dev_kfree_skb_any(skb); lp->saved_skb = NULL; netif_wake_queue(dev); @@ -625,7 +625,7 @@ static void smc_hardware_send_packet( struct net_device * dev ) /* point to the beginning of the packet */ outw( PTR_AUTOINC , ioaddr + POINTER ); - PRINTK3((CARDNAME": Trying to xmit packet of length %x\n", length )); + PRINTK3((CARDNAME": Trying to xmit packet of length %x\n", length)); #if SMC_DEBUG > 2 print_packet( buf, length ); #endif @@ -865,7 +865,6 @@ static const struct net_device_ops smc_netdev_ops = { static int __init smc_probe(struct net_device *dev, int ioaddr) { int i, memory, retval; - static unsigned version_printed; unsigned int bank; const char *version_string; @@ -937,8 +936,7 @@ static int __init smc_probe(struct net_device *dev, int ioaddr) It might be prudent to check a listing of MAC addresses against the hardware address, or do some other tests. */ - if (version_printed++ == 0) - printk("%s", version); + pr_info_once("%s\n", version); /* fill in some of the fields */ dev->base_addr = ioaddr; @@ -1027,21 +1025,21 @@ static int __init smc_probe(struct net_device *dev, int ioaddr) /* now, print out the card info, in a short format.. */ - printk("%s: %s(r:%d) at %#3x IRQ:%d INTF:%s MEM:%db ", dev->name, - version_string, revision_register & 0xF, ioaddr, dev->irq, - if_string, memory ); + netdev_info(dev, "%s(r:%d) at %#3x IRQ:%d INTF:%s MEM:%db ", + version_string, revision_register & 0xF, ioaddr, dev->irq, + if_string, memory); /* . Print the Ethernet address */ - printk("ADDR: %pM\n", dev->dev_addr); + netdev_info(dev, "ADDR: %pM\n", dev->dev_addr); /* Grab the IRQ */ - retval = request_irq(dev->irq, smc_interrupt, 0, DRV_NAME, dev); - if (retval) { - printk("%s: unable to get IRQ %d (irqval=%d).\n", DRV_NAME, - dev->irq, retval); - goto err_out; - } + retval = request_irq(dev->irq, smc_interrupt, 0, DRV_NAME, dev); + if (retval) { + netdev_warn(dev, "%s: unable to get IRQ %d (irqval=%d).\n", + DRV_NAME, dev->irq, retval); + goto err_out; + } dev->netdev_ops = &smc_netdev_ops; dev->watchdog_timeo = HZ/20; @@ -1061,30 +1059,32 @@ static void print_packet( byte * buf, int length ) int remainder; int lines; - printk("Packet of length %d\n", length); + pr_dbg("Packet of length %d\n", length); lines = length / 16; remainder = length % 16; for ( i = 0; i < lines ; i ++ ) { int cur; + printk(KERN_DEBUG); for ( cur = 0; cur < 8; cur ++ ) { byte a, b; a = *(buf ++ ); b = *(buf ++ ); - printk("%02x%02x ", a, b ); + pr_cont("%02x%02x ", a, b); } - printk("\n"); + pr_cont("\n"); } + printk(KERN_DEBUG); for ( i = 0; i < remainder/2 ; i++ ) { byte a, b; a = *(buf ++ ); b = *(buf ++ ); - printk("%02x%02x ", a, b ); + pr_cont("%02x%02x ", a, b); } - printk("\n"); + pr_cont("\n"); #endif } #endif @@ -1151,9 +1151,8 @@ static void smc_timeout(struct net_device *dev) { /* If we get here, some higher level has decided we are broken. There should really be a "kick me" function call instead. */ - printk(KERN_WARNING CARDNAME": transmit timed out, %s?\n", - tx_done(dev) ? "IRQ conflict" : - "network cable problem"); + netdev_warn(dev, CARDNAME": transmit timed out, %s?\n", + tx_done(dev) ? "IRQ conflict" : "network cable problem"); /* "kick" the adaptor */ smc_reset( dev->base_addr ); smc_enable( dev->base_addr ); @@ -1323,8 +1322,7 @@ static void smc_tx( struct net_device * dev ) dev->stats.tx_errors++; if ( tx_status & TS_LOSTCAR ) dev->stats.tx_carrier_errors++; if ( tx_status & TS_LATCOL ) { - printk(KERN_DEBUG CARDNAME - ": Late collision occurred on last xmit.\n"); + netdev_dbg(dev, CARDNAME": Late collision occurred on last xmit.\n"); dev->stats.tx_window_errors++; } #if 0 @@ -1332,7 +1330,7 @@ static void smc_tx( struct net_device * dev ) #endif if ( tx_status & TS_SUCCESS ) { - printk(CARDNAME": Successful packet caused interrupt\n"); + netdev_info(dev, CARDNAME": Successful packet caused interrupt\n"); } /* re-enable transmit */ SMC_SELECT_BANK( 0 ); diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index 656d2e2ebfc9..8ef70d9c20c1 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -740,7 +740,7 @@ static int smc91c92_resume(struct pcmcia_device *link) (smc->cardid == PRODID_PSION_NET100))) { i = osi_load_firmware(link); if (i) { - pr_err("smc91c92_cs: Failed to load firmware\n"); + netdev_err(dev, "Failed to load firmware\n"); return i; } } @@ -793,7 +793,7 @@ static int check_sig(struct pcmcia_device *link) } if (width) { - pr_info("using 8-bit IO window\n"); + netdev_info(dev, "using 8-bit IO window\n"); smc91c92_suspend(link); pcmcia_fixup_iowidth(link); @@ -1036,7 +1036,7 @@ static void smc_dump(struct net_device *dev) save = inw(ioaddr + BANK_SELECT); for (w = 0; w < 4; w++) { SMC_SELECT_BANK(w); - netdev_printk(KERN_DEBUG, dev, "bank %d: ", w); + netdev_dbg(dev, "bank %d: ", w); for (i = 0; i < 14; i += 2) pr_cont(" %04x", inw(ioaddr + i)); pr_cont("\n"); @@ -1213,8 +1213,7 @@ static netdev_tx_t smc_start_xmit(struct sk_buff *skb, if (smc->saved_skb) { /* THIS SHOULD NEVER HAPPEN. */ dev->stats.tx_aborted_errors++; - netdev_printk(KERN_DEBUG, dev, - "Internal error -- sent packet while busy\n"); + netdev_dbg(dev, "Internal error -- sent packet while busy\n"); return NETDEV_TX_BUSY; } smc->saved_skb = skb; @@ -1254,7 +1253,7 @@ static netdev_tx_t smc_start_xmit(struct sk_buff *skb, } /* Otherwise defer until the Tx-space-allocated interrupt. */ - pr_debug("%s: memory allocation deferred.\n", dev->name); + netdev_dbg(dev, "memory allocation deferred.\n"); outw((IM_ALLOC_INT << 8) | (ir & 0xff00), ioaddr + INTERRUPT); spin_unlock_irqrestore(&smc->lock, flags); @@ -1317,8 +1316,8 @@ static void smc_eph_irq(struct net_device *dev) SMC_SELECT_BANK(0); ephs = inw(ioaddr + EPH); - pr_debug("%s: Ethernet protocol handler interrupt, status" - " %4.4x.\n", dev->name, ephs); + netdev_dbg(dev, "Ethernet protocol handler interrupt, status %4.4x.\n", + ephs); /* Could be a counter roll-over warning: update stats. */ card_stats = inw(ioaddr + COUNTER); /* single collisions */ @@ -1357,8 +1356,8 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) ioaddr = dev->base_addr; - pr_debug("%s: SMC91c92 interrupt %d at %#x.\n", dev->name, - irq, ioaddr); + netdev_dbg(dev, "SMC91c92 interrupt %d at %#x.\n", + irq, ioaddr); spin_lock(&smc->lock); smc->watchdog = 0; @@ -1366,8 +1365,8 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) if ((saved_bank & 0xff00) != 0x3300) { /* The device does not exist -- the card could be off-line, or maybe it has been ejected. */ - pr_debug("%s: SMC91c92 interrupt %d for non-existent" - "/ejected device.\n", dev->name, irq); + netdev_dbg(dev, "SMC91c92 interrupt %d for non-existent/ejected device.\n", + irq); handled = 0; goto irq_done; } @@ -1380,8 +1379,8 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) do { /* read the status flag, and mask it */ status = inw(ioaddr + INTERRUPT) & 0xff; - pr_debug("%s: Status is %#2.2x (mask %#2.2x).\n", dev->name, - status, mask); + netdev_dbg(dev, "Status is %#2.2x (mask %#2.2x).\n", + status, mask); if ((status & mask) == 0) { if (bogus_cnt == INTR_WORK) handled = 0; @@ -1425,15 +1424,15 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) smc_eph_irq(dev); } while (--bogus_cnt); - pr_debug(" Restoring saved registers mask %2.2x bank %4.4x" - " pointer %4.4x.\n", mask, saved_bank, saved_pointer); + netdev_dbg(dev, " Restoring saved registers mask %2.2x bank %4.4x pointer %4.4x.\n", + mask, saved_bank, saved_pointer); /* restore state register */ outw((mask<<8), ioaddr + INTERRUPT); outw(saved_pointer, ioaddr + POINTER); SMC_SELECT_BANK(saved_bank); - pr_debug("%s: Exiting interrupt IRQ%d.\n", dev->name, irq); + netdev_dbg(dev, "Exiting interrupt IRQ%d.\n", irq); irq_done: @@ -1491,10 +1490,10 @@ static void smc_rx(struct net_device *dev) rx_status = inw(ioaddr + DATA_1); packet_length = inw(ioaddr + DATA_1) & 0x07ff; - pr_debug("%s: Receive status %4.4x length %d.\n", - dev->name, rx_status, packet_length); + netdev_dbg(dev, "Receive status %4.4x length %d.\n", + rx_status, packet_length); - if (!(rx_status & RS_ERRORS)) { + if (!(rx_status & RS_ERRORS)) { /* do stuff to make a new packet */ struct sk_buff *skb; @@ -1502,7 +1501,7 @@ static void smc_rx(struct net_device *dev) skb = netdev_alloc_skb(dev, packet_length+2); if (skb == NULL) { - pr_debug("%s: Low memory, packet dropped.\n", dev->name); + netdev_dbg(dev, "Low memory, packet dropped.\n"); dev->stats.rx_dropped++; outw(MC_RELEASE, ioaddr + MMU_CMD); return; @@ -1643,7 +1642,7 @@ static void smc_reset(struct net_device *dev) struct smc_private *smc = netdev_priv(dev); int i; - pr_debug("%s: smc91c92 reset called.\n", dev->name); + netdev_dbg(dev, "smc91c92 reset called.\n"); /* The first interaction must be a write to bring the chip out of sleep mode. */ diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 73be7f3982e6..0c9b5d94154f 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -58,7 +58,7 @@ * 22/09/04 Nicolas Pitre big update (see commit log for details) */ static const char version[] = - "smc91x.c: v1.1, sep 22 2004 by Nicolas Pitre <nico@fluxnic.net>\n"; + "smc91x.c: v1.1, sep 22 2004 by Nicolas Pitre <nico@fluxnic.net>"; /* Debugging level */ #ifndef SMC_DEBUG @@ -149,16 +149,16 @@ MODULE_ALIAS("platform:smc91x"); #define MII_DELAY 1 #if SMC_DEBUG > 0 -#define DBG(n, args...) \ +#define DBG(n, dev, args...) \ do { \ if (SMC_DEBUG >= (n)) \ - printk(args); \ + netdev_dbg(dev, args); \ } while (0) -#define PRINTK(args...) printk(args) +#define PRINTK(dev, args...) netdev_info(dev, args) #else -#define DBG(n, args...) do { } while(0) -#define PRINTK(args...) printk(KERN_DEBUG args) +#define DBG(n, dev, args...) do { } while (0) +#define PRINTK(dev, args...) netdev_dbg(dev, args) #endif #if SMC_DEBUG > 3 @@ -173,24 +173,26 @@ static void PRINT_PKT(u_char *buf, int length) for (i = 0; i < lines ; i ++) { int cur; + printk(KERN_DEBUG); for (cur = 0; cur < 8; cur++) { u_char a, b; a = *buf++; b = *buf++; - printk("%02x%02x ", a, b); + pr_cont("%02x%02x ", a, b); } - printk("\n"); + pr_cont("\n"); } + printk(KERN_DEBUG); for (i = 0; i < remainder/2 ; i++) { u_char a, b; a = *buf++; b = *buf++; - printk("%02x%02x ", a, b); + pr_cont("%02x%02x ", a, b); } - printk("\n"); + pr_cont("\n"); } #else -#define PRINT_PKT(x...) do { } while(0) +#define PRINT_PKT(x...) do { } while (0) #endif @@ -226,8 +228,8 @@ static void PRINT_PKT(u_char *buf, int length) unsigned long timeout = jiffies + 2; \ while (SMC_GET_MMU_CMD(lp) & MC_BUSY) { \ if (time_after(jiffies, timeout)) { \ - printk("%s: timeout %s line %d\n", \ - dev->name, __FILE__, __LINE__); \ + netdev_dbg(dev, "timeout %s line %d\n", \ + __FILE__, __LINE__); \ break; \ } \ cpu_relax(); \ @@ -246,7 +248,7 @@ static void smc_reset(struct net_device *dev) unsigned int ctl, cfg; struct sk_buff *pending_skb; - DBG(2, "%s: %s\n", dev->name, __func__); + DBG(2, dev, "%s\n", __func__); /* Disable all interrupts, block TX tasklet */ spin_lock_irq(&lp->lock); @@ -339,7 +341,7 @@ static void smc_enable(struct net_device *dev) void __iomem *ioaddr = lp->base; int mask; - DBG(2, "%s: %s\n", dev->name, __func__); + DBG(2, dev, "%s\n", __func__); /* see the header file for options in TCR/RCR DEFAULT */ SMC_SELECT_BANK(lp, 0); @@ -373,7 +375,7 @@ static void smc_shutdown(struct net_device *dev) void __iomem *ioaddr = lp->base; struct sk_buff *pending_skb; - DBG(2, "%s: %s\n", CARDNAME, __func__); + DBG(2, dev, "%s: %s\n", CARDNAME, __func__); /* no more interrupts for me */ spin_lock_irq(&lp->lock); @@ -406,11 +408,11 @@ static inline void smc_rcv(struct net_device *dev) void __iomem *ioaddr = lp->base; unsigned int packet_number, status, packet_len; - DBG(3, "%s: %s\n", dev->name, __func__); + DBG(3, dev, "%s\n", __func__); packet_number = SMC_GET_RXFIFO(lp); if (unlikely(packet_number & RXFIFO_REMPTY)) { - PRINTK("%s: smc_rcv with nothing on FIFO.\n", dev->name); + PRINTK(dev, "smc_rcv with nothing on FIFO.\n"); return; } @@ -420,9 +422,8 @@ static inline void smc_rcv(struct net_device *dev) /* First two words are status and packet length */ SMC_GET_PKT_HDR(lp, status, packet_len); packet_len &= 0x07ff; /* mask off top bits */ - DBG(2, "%s: RX PNR 0x%x STATUS 0x%04x LENGTH 0x%04x (%d)\n", - dev->name, packet_number, status, - packet_len, packet_len); + DBG(2, dev, "RX PNR 0x%x STATUS 0x%04x LENGTH 0x%04x (%d)\n", + packet_number, status, packet_len, packet_len); back: if (unlikely(packet_len < 6 || status & RS_ERRORS)) { @@ -433,8 +434,8 @@ static inline void smc_rcv(struct net_device *dev) } if (packet_len < 6) { /* bloody hardware */ - printk(KERN_ERR "%s: fubar (rxlen %u status %x\n", - dev->name, packet_len, status); + netdev_err(dev, "fubar (rxlen %u status %x\n", + packet_len, status); status |= RS_TOOSHORT; } SMC_WAIT_MMU_BUSY(lp); @@ -551,7 +552,7 @@ static void smc_hardware_send_pkt(unsigned long data) unsigned char *buf; unsigned long flags; - DBG(3, "%s: %s\n", dev->name, __func__); + DBG(3, dev, "%s\n", __func__); if (!smc_special_trylock(&lp->lock, flags)) { netif_stop_queue(dev); @@ -568,7 +569,7 @@ static void smc_hardware_send_pkt(unsigned long data) packet_no = SMC_GET_AR(lp); if (unlikely(packet_no & AR_FAILED)) { - printk("%s: Memory allocation failed.\n", dev->name); + netdev_err(dev, "Memory allocation failed.\n"); dev->stats.tx_errors++; dev->stats.tx_fifo_errors++; smc_special_unlock(&lp->lock, flags); @@ -581,8 +582,8 @@ static void smc_hardware_send_pkt(unsigned long data) buf = skb->data; len = skb->len; - DBG(2, "%s: TX PNR 0x%x LENGTH 0x%04x (%d) BUF 0x%p\n", - dev->name, packet_no, len, len, buf); + DBG(2, dev, "TX PNR 0x%x LENGTH 0x%04x (%d) BUF 0x%p\n", + packet_no, len, len, buf); PRINT_PKT(buf, len); /* @@ -637,7 +638,7 @@ static int smc_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int numPages, poll_count, status; unsigned long flags; - DBG(3, "%s: %s\n", dev->name, __func__); + DBG(3, dev, "%s\n", __func__); BUG_ON(lp->pending_tx_skb != NULL); @@ -654,7 +655,7 @@ static int smc_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) */ numPages = ((skb->len & ~1) + (6 - 1)) >> 8; if (unlikely(numPages > 7)) { - printk("%s: Far too big packet error.\n", dev->name); + netdev_warn(dev, "Far too big packet error.\n"); dev->stats.tx_errors++; dev->stats.tx_dropped++; dev_kfree_skb(skb); @@ -685,7 +686,7 @@ static int smc_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) if (!poll_count) { /* oh well, wait until the chip finds memory later */ netif_stop_queue(dev); - DBG(2, "%s: TX memory allocation deferred.\n", dev->name); + DBG(2, dev, "TX memory allocation deferred.\n"); SMC_ENABLE_INT(lp, IM_ALLOC_INT); } else { /* @@ -709,12 +710,12 @@ static void smc_tx(struct net_device *dev) void __iomem *ioaddr = lp->base; unsigned int saved_packet, packet_no, tx_status, pkt_len; - DBG(3, "%s: %s\n", dev->name, __func__); + DBG(3, dev, "%s\n", __func__); /* If the TX FIFO is empty then nothing to do */ packet_no = SMC_GET_TXFIFO(lp); if (unlikely(packet_no & TXFIFO_TEMPTY)) { - PRINTK("%s: smc_tx with nothing on FIFO.\n", dev->name); + PRINTK(dev, "smc_tx with nothing on FIFO.\n"); return; } @@ -725,8 +726,8 @@ static void smc_tx(struct net_device *dev) /* read the first word (status word) from this packet */ SMC_SET_PTR(lp, PTR_AUTOINC | PTR_READ); SMC_GET_PKT_HDR(lp, tx_status, pkt_len); - DBG(2, "%s: TX STATUS 0x%04x PNR 0x%02x\n", - dev->name, tx_status, packet_no); + DBG(2, dev, "TX STATUS 0x%04x PNR 0x%02x\n", + tx_status, packet_no); if (!(tx_status & ES_TX_SUC)) dev->stats.tx_errors++; @@ -735,14 +736,12 @@ static void smc_tx(struct net_device *dev) dev->stats.tx_carrier_errors++; if (tx_status & (ES_LATCOL | ES_16COL)) { - PRINTK("%s: %s occurred on last xmit\n", dev->name, + PRINTK(dev, "%s occurred on last xmit\n", (tx_status & ES_LATCOL) ? "late collision" : "too many collisions"); dev->stats.tx_window_errors++; if (!(dev->stats.tx_window_errors & 63) && net_ratelimit()) { - printk(KERN_INFO "%s: unexpectedly large number of " - "bad collisions. Please check duplex " - "setting.\n", dev->name); + netdev_info(dev, "unexpectedly large number of bad collisions. Please check duplex setting.\n"); } } @@ -830,8 +829,8 @@ static int smc_phy_read(struct net_device *dev, int phyaddr, int phyreg) /* Return to idle state */ SMC_SET_MII(lp, SMC_GET_MII(lp) & ~(MII_MCLK|MII_MDOE|MII_MDO)); - DBG(3, "%s: phyaddr=0x%x, phyreg=0x%x, phydata=0x%x\n", - __func__, phyaddr, phyreg, phydata); + DBG(3, dev, "%s: phyaddr=0x%x, phyreg=0x%x, phydata=0x%x\n", + __func__, phyaddr, phyreg, phydata); SMC_SELECT_BANK(lp, 2); return phydata; @@ -857,8 +856,8 @@ static void smc_phy_write(struct net_device *dev, int phyaddr, int phyreg, /* Return to idle state */ SMC_SET_MII(lp, SMC_GET_MII(lp) & ~(MII_MCLK|MII_MDOE|MII_MDO)); - DBG(3, "%s: phyaddr=0x%x, phyreg=0x%x, phydata=0x%x\n", - __func__, phyaddr, phyreg, phydata); + DBG(3, dev, "%s: phyaddr=0x%x, phyreg=0x%x, phydata=0x%x\n", + __func__, phyaddr, phyreg, phydata); SMC_SELECT_BANK(lp, 2); } @@ -871,7 +870,7 @@ static void smc_phy_detect(struct net_device *dev) struct smc_local *lp = netdev_priv(dev); int phyaddr; - DBG(2, "%s: %s\n", dev->name, __func__); + DBG(2, dev, "%s\n", __func__); lp->phy_type = 0; @@ -886,8 +885,8 @@ static void smc_phy_detect(struct net_device *dev) id1 = smc_phy_read(dev, phyaddr & 31, MII_PHYSID1); id2 = smc_phy_read(dev, phyaddr & 31, MII_PHYSID2); - DBG(3, "%s: phy_id1=0x%x, phy_id2=0x%x\n", - dev->name, id1, id2); + DBG(3, dev, "phy_id1=0x%x, phy_id2=0x%x\n", + id1, id2); /* Make sure it is a valid identifier */ if (id1 != 0x0000 && id1 != 0xffff && id1 != 0x8000 && @@ -910,7 +909,7 @@ static int smc_phy_fixed(struct net_device *dev) int phyaddr = lp->mii.phy_id; int bmcr, cfg1; - DBG(3, "%s: %s\n", dev->name, __func__); + DBG(3, dev, "%s\n", __func__); /* Enter Link Disable state */ cfg1 = smc_phy_read(dev, phyaddr, PHY_CFG1_REG); @@ -1044,7 +1043,7 @@ static void smc_phy_configure(struct work_struct *work) int my_ad_caps; /* My Advertised capabilities */ int status; - DBG(3, "%s:smc_program_phy()\n", dev->name); + DBG(3, dev, "smc_program_phy()\n"); spin_lock_irq(&lp->lock); @@ -1055,7 +1054,7 @@ static void smc_phy_configure(struct work_struct *work) goto smc_phy_configure_exit; if (smc_phy_reset(dev, phyaddr)) { - printk("%s: PHY reset timed out\n", dev->name); + netdev_info(dev, "PHY reset timed out\n"); goto smc_phy_configure_exit; } @@ -1082,7 +1081,7 @@ static void smc_phy_configure(struct work_struct *work) my_phy_caps = smc_phy_read(dev, phyaddr, MII_BMSR); if (!(my_phy_caps & BMSR_ANEGCAPABLE)) { - printk(KERN_INFO "Auto negotiation NOT supported\n"); + netdev_info(dev, "Auto negotiation NOT supported\n"); smc_phy_fixed(dev); goto smc_phy_configure_exit; } @@ -1118,8 +1117,8 @@ static void smc_phy_configure(struct work_struct *work) */ status = smc_phy_read(dev, phyaddr, MII_ADVERTISE); - DBG(2, "%s: phy caps=%x\n", dev->name, my_phy_caps); - DBG(2, "%s: phy advertised caps=%x\n", dev->name, my_ad_caps); + DBG(2, dev, "phy caps=%x\n", my_phy_caps); + DBG(2, dev, "phy advertised caps=%x\n", my_ad_caps); /* Restart auto-negotiation process in order to advertise my caps */ smc_phy_write(dev, phyaddr, MII_BMCR, BMCR_ANENABLE | BMCR_ANRESTART); @@ -1143,7 +1142,7 @@ static void smc_phy_interrupt(struct net_device *dev) int phyaddr = lp->mii.phy_id; int phy18; - DBG(2, "%s: %s\n", dev->name, __func__); + DBG(2, dev, "%s\n", __func__); if (lp->phy_type == 0) return; @@ -1179,8 +1178,8 @@ static void smc_10bt_check_media(struct net_device *dev, int init) netif_carrier_on(dev); } if (netif_msg_link(lp)) - printk(KERN_INFO "%s: link %s\n", dev->name, - new_carrier ? "up" : "down"); + netdev_info(dev, "link %s\n", + new_carrier ? "up" : "down"); } } @@ -1211,7 +1210,7 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) int status, mask, timeout, card_stats; int saved_pointer; - DBG(3, "%s: %s\n", dev->name, __func__); + DBG(3, dev, "%s\n", __func__); spin_lock(&lp->lock); @@ -1230,12 +1229,12 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) do { status = SMC_GET_INT(lp); - DBG(2, "%s: INT 0x%02x MASK 0x%02x MEM 0x%04x FIFO 0x%04x\n", - dev->name, status, mask, - ({ int meminfo; SMC_SELECT_BANK(lp, 0); - meminfo = SMC_GET_MIR(lp); - SMC_SELECT_BANK(lp, 2); meminfo; }), - SMC_GET_FIFO(lp)); + DBG(2, dev, "INT 0x%02x MASK 0x%02x MEM 0x%04x FIFO 0x%04x\n", + status, mask, + ({ int meminfo; SMC_SELECT_BANK(lp, 0); + meminfo = SMC_GET_MIR(lp); + SMC_SELECT_BANK(lp, 2); meminfo; }), + SMC_GET_FIFO(lp)); status &= mask; if (!status) @@ -1243,20 +1242,20 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) if (status & IM_TX_INT) { /* do this before RX as it will free memory quickly */ - DBG(3, "%s: TX int\n", dev->name); + DBG(3, dev, "TX int\n"); smc_tx(dev); SMC_ACK_INT(lp, IM_TX_INT); if (THROTTLE_TX_PKTS) netif_wake_queue(dev); } else if (status & IM_RCV_INT) { - DBG(3, "%s: RX irq\n", dev->name); + DBG(3, dev, "RX irq\n"); smc_rcv(dev); } else if (status & IM_ALLOC_INT) { - DBG(3, "%s: Allocation irq\n", dev->name); + DBG(3, dev, "Allocation irq\n"); tasklet_hi_schedule(&lp->tx_task); mask &= ~IM_ALLOC_INT; } else if (status & IM_TX_EMPTY_INT) { - DBG(3, "%s: TX empty\n", dev->name); + DBG(3, dev, "TX empty\n"); mask &= ~IM_TX_EMPTY_INT; /* update stats */ @@ -1271,10 +1270,10 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) /* multiple collisions */ dev->stats.collisions += card_stats & 0xF; } else if (status & IM_RX_OVRN_INT) { - DBG(1, "%s: RX overrun (EPH_ST 0x%04x)\n", dev->name, - ({ int eph_st; SMC_SELECT_BANK(lp, 0); - eph_st = SMC_GET_EPH_STATUS(lp); - SMC_SELECT_BANK(lp, 2); eph_st; })); + DBG(1, dev, "RX overrun (EPH_ST 0x%04x)\n", + ({ int eph_st; SMC_SELECT_BANK(lp, 0); + eph_st = SMC_GET_EPH_STATUS(lp); + SMC_SELECT_BANK(lp, 2); eph_st; })); SMC_ACK_INT(lp, IM_RX_OVRN_INT); dev->stats.rx_errors++; dev->stats.rx_fifo_errors++; @@ -1285,7 +1284,7 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) smc_phy_interrupt(dev); } else if (status & IM_ERCV_INT) { SMC_ACK_INT(lp, IM_ERCV_INT); - PRINTK("%s: UNSUPPORTED: ERCV INTERRUPT\n", dev->name); + PRINTK(dev, "UNSUPPORTED: ERCV INTERRUPT\n"); } } while (--timeout); @@ -1296,11 +1295,11 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) #ifndef CONFIG_NET_POLL_CONTROLLER if (timeout == MAX_IRQ_LOOPS) - PRINTK("%s: spurious interrupt (mask = 0x%02x)\n", - dev->name, mask); + PRINTK(dev, "spurious interrupt (mask = 0x%02x)\n", + mask); #endif - DBG(3, "%s: Interrupt done (%d loops)\n", - dev->name, MAX_IRQ_LOOPS - timeout); + DBG(3, dev, "Interrupt done (%d loops)\n", + MAX_IRQ_LOOPS - timeout); /* * We return IRQ_HANDLED unconditionally here even if there was @@ -1333,7 +1332,7 @@ static void smc_timeout(struct net_device *dev) void __iomem *ioaddr = lp->base; int status, mask, eph_st, meminfo, fifo; - DBG(2, "%s: %s\n", dev->name, __func__); + DBG(2, dev, "%s\n", __func__); spin_lock_irq(&lp->lock); status = SMC_GET_INT(lp); @@ -1344,9 +1343,8 @@ static void smc_timeout(struct net_device *dev) meminfo = SMC_GET_MIR(lp); SMC_SELECT_BANK(lp, 2); spin_unlock_irq(&lp->lock); - PRINTK( "%s: TX timeout (INT 0x%02x INTMASK 0x%02x " - "MEM 0x%04x FIFO 0x%04x EPH_ST 0x%04x)\n", - dev->name, status, mask, meminfo, fifo, eph_st ); + PRINTK(dev, "TX timeout (INT 0x%02x INTMASK 0x%02x MEM 0x%04x FIFO 0x%04x EPH_ST 0x%04x)\n", + status, mask, meminfo, fifo, eph_st); smc_reset(dev); smc_enable(dev); @@ -1377,10 +1375,10 @@ static void smc_set_multicast_list(struct net_device *dev) unsigned char multicast_table[8]; int update_multicast = 0; - DBG(2, "%s: %s\n", dev->name, __func__); + DBG(2, dev, "%s\n", __func__); if (dev->flags & IFF_PROMISC) { - DBG(2, "%s: RCR_PRMS\n", dev->name); + DBG(2, dev, "RCR_PRMS\n"); lp->rcr_cur_mode |= RCR_PRMS; } @@ -1395,7 +1393,7 @@ static void smc_set_multicast_list(struct net_device *dev) * checked before the table is */ else if (dev->flags & IFF_ALLMULTI || netdev_mc_count(dev) > 16) { - DBG(2, "%s: RCR_ALMUL\n", dev->name); + DBG(2, dev, "RCR_ALMUL\n"); lp->rcr_cur_mode |= RCR_ALMUL; } @@ -1437,7 +1435,7 @@ static void smc_set_multicast_list(struct net_device *dev) /* now, the table can be loaded into the chipset */ update_multicast = 1; } else { - DBG(2, "%s: ~(RCR_PRMS|RCR_ALMUL)\n", dev->name); + DBG(2, dev, "~(RCR_PRMS|RCR_ALMUL)\n"); lp->rcr_cur_mode &= ~(RCR_PRMS | RCR_ALMUL); /* @@ -1470,7 +1468,7 @@ smc_open(struct net_device *dev) { struct smc_local *lp = netdev_priv(dev); - DBG(2, "%s: %s\n", dev->name, __func__); + DBG(2, dev, "%s\n", __func__); /* Setup the default Register Modes */ lp->tcr_cur_mode = TCR_DEFAULT; @@ -1514,7 +1512,7 @@ static int smc_close(struct net_device *dev) { struct smc_local *lp = netdev_priv(dev); - DBG(2, "%s: %s\n", dev->name, __func__); + DBG(2, dev, "%s\n", __func__); netif_stop_queue(dev); netif_carrier_off(dev); @@ -1694,7 +1692,7 @@ static int smc_ethtool_geteeprom(struct net_device *dev, int i; int imax; - DBG(1, "Reading %d bytes at %d(0x%x)\n", + DBG(1, dev, "Reading %d bytes at %d(0x%x)\n", eeprom->len, eeprom->offset, eeprom->offset); imax = smc_ethtool_geteeprom_len(dev); for (i = 0; i < eeprom->len; i += 2) { @@ -1706,7 +1704,7 @@ static int smc_ethtool_geteeprom(struct net_device *dev, ret = smc_read_eeprom_word(dev, offset >> 1, &wbuf); if (ret != 0) return ret; - DBG(2, "Read 0x%x from 0x%x\n", wbuf, offset >> 1); + DBG(2, dev, "Read 0x%x from 0x%x\n", wbuf, offset >> 1); data[i] = (wbuf >> 8) & 0xff; data[i+1] = wbuf & 0xff; } @@ -1719,8 +1717,8 @@ static int smc_ethtool_seteeprom(struct net_device *dev, int i; int imax; - DBG(1, "Writing %d bytes to %d(0x%x)\n", - eeprom->len, eeprom->offset, eeprom->offset); + DBG(1, dev, "Writing %d bytes to %d(0x%x)\n", + eeprom->len, eeprom->offset, eeprom->offset); imax = smc_ethtool_geteeprom_len(dev); for (i = 0; i < eeprom->len; i += 2) { int ret; @@ -1729,7 +1727,7 @@ static int smc_ethtool_seteeprom(struct net_device *dev, if (offset > imax) break; wbuf = (data[i] << 8) | data[i + 1]; - DBG(2, "Writing 0x%x to 0x%x\n", wbuf, offset >> 1); + DBG(2, dev, "Writing 0x%x to 0x%x\n", wbuf, offset >> 1); ret = smc_write_eeprom_word(dev, offset >> 1, wbuf); if (ret != 0) return ret; @@ -1784,7 +1782,7 @@ static int smc_findirq(struct smc_local *lp) int timeout = 20; unsigned long cookie; - DBG(2, "%s: %s\n", CARDNAME, __func__); + DBG(2, dev, "%s: %s\n", CARDNAME, __func__); cookie = probe_irq_on(); @@ -1856,21 +1854,21 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr, unsigned long irq_flags) { struct smc_local *lp = netdev_priv(dev); - static int version_printed = 0; int retval; unsigned int val, revision_register; const char *version_string; - DBG(2, "%s: %s\n", CARDNAME, __func__); + DBG(2, dev, "%s: %s\n", CARDNAME, __func__); /* First, see if the high byte is 0x33 */ val = SMC_CURRENT_BANK(lp); - DBG(2, "%s: bank signature probe returned 0x%04x\n", CARDNAME, val); + DBG(2, dev, "%s: bank signature probe returned 0x%04x\n", + CARDNAME, val); if ((val & 0xFF00) != 0x3300) { if ((val & 0xFF) == 0x33) { - printk(KERN_WARNING - "%s: Detected possible byte-swapped interface" - " at IOADDR %p\n", CARDNAME, ioaddr); + netdev_warn(dev, + "%s: Detected possible byte-swapped interface at IOADDR %p\n", + CARDNAME, ioaddr); } retval = -ENODEV; goto err_out; @@ -1897,8 +1895,8 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr, val = SMC_GET_BASE(lp); val = ((val & 0x1F00) >> 3) << SMC_IO_SHIFT; if (((unsigned int)ioaddr & (0x3e0 << SMC_IO_SHIFT)) != val) { - printk("%s: IOADDR %p doesn't match configuration (%x).\n", - CARDNAME, ioaddr, val); + netdev_warn(dev, "%s: IOADDR %p doesn't match configuration (%x).\n", + CARDNAME, ioaddr, val); } /* @@ -1908,21 +1906,19 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr, */ SMC_SELECT_BANK(lp, 3); revision_register = SMC_GET_REV(lp); - DBG(2, "%s: revision = 0x%04x\n", CARDNAME, revision_register); + DBG(2, dev, "%s: revision = 0x%04x\n", CARDNAME, revision_register); version_string = chip_ids[ (revision_register >> 4) & 0xF]; if (!version_string || (revision_register & 0xff00) != 0x3300) { /* I don't recognize this chip, so... */ - printk("%s: IO %p: Unrecognized revision register 0x%04x" - ", Contact author.\n", CARDNAME, - ioaddr, revision_register); + netdev_warn(dev, "%s: IO %p: Unrecognized revision register 0x%04x, Contact author.\n", + CARDNAME, ioaddr, revision_register); retval = -ENODEV; goto err_out; } /* At this point I'll assume that the chip is an SMC91x. */ - if (version_printed++ == 0) - printk("%s", version); + pr_info_once("%s\n", version); /* fill in some of the fields */ dev->base_addr = (unsigned long)ioaddr; @@ -1940,7 +1936,7 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr, /* * If dev->irq is 0, then the device has to be banged on to see * what the IRQ is. - * + * * This banging doesn't always detect the IRQ, for unknown reasons. * a workaround is to reset the chip and try again. * @@ -1965,8 +1961,7 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr, } } if (dev->irq == 0) { - printk("%s: Couldn't autodetect your IRQ. Use irq=xx.\n", - dev->name); + netdev_warn(dev, "Couldn't autodetect your IRQ. Use irq=xx.\n"); retval = -ENODEV; goto err_out; } @@ -2030,32 +2025,31 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr, retval = register_netdev(dev); if (retval == 0) { /* now, print out the card info, in a short format.. */ - printk("%s: %s (rev %d) at %p IRQ %d", - dev->name, version_string, revision_register & 0x0f, - lp->base, dev->irq); + netdev_info(dev, "%s (rev %d) at %p IRQ %d", + version_string, revision_register & 0x0f, + lp->base, dev->irq); if (dev->dma != (unsigned char)-1) - printk(" DMA %d", dev->dma); + pr_cont(" DMA %d", dev->dma); - printk("%s%s\n", + pr_cont("%s%s\n", lp->cfg.flags & SMC91X_NOWAIT ? " [nowait]" : "", THROTTLE_TX_PKTS ? " [throttle_tx]" : ""); if (!is_valid_ether_addr(dev->dev_addr)) { - printk("%s: Invalid ethernet MAC address. Please " - "set using ifconfig\n", dev->name); + netdev_warn(dev, "Invalid ethernet MAC address. Please set using ifconfig\n"); } else { /* Print the Ethernet address */ - printk("%s: Ethernet addr: %pM\n", - dev->name, dev->dev_addr); + netdev_info(dev, "Ethernet addr: %pM\n", + dev->dev_addr); } if (lp->phy_type == 0) { - PRINTK("%s: No PHY found\n", dev->name); + PRINTK(dev, "No PHY found\n"); } else if ((lp->phy_type & 0xfffffff0) == 0x0016f840) { - PRINTK("%s: PHY LAN83C183 (LAN91C111 Internal)\n", dev->name); + PRINTK(dev, "PHY LAN83C183 (LAN91C111 Internal)\n"); } else if ((lp->phy_type & 0xfffffff0) == 0x02821c50) { - PRINTK("%s: PHY LAN83C180\n", dev->name); + PRINTK(dev, "PHY LAN83C180\n"); } } @@ -2165,7 +2159,8 @@ static inline void smc_request_datacs(struct platform_device *pdev, struct net_d return; if(!request_mem_region(res->start, SMC_DATA_EXTENT, CARDNAME)) { - printk(KERN_INFO "%s: failed to request datacs memory region.\n", CARDNAME); + netdev_info(ndev, "%s: failed to request datacs memory region.\n", + CARDNAME); return; } @@ -2307,7 +2302,7 @@ static int smc_drv_probe(struct platform_device *pdev) out_free_netdev: free_netdev(ndev); out: - printk("%s: not found (%d).\n", CARDNAME, ret); + pr_info("%s: not found (%d).\n", CARDNAME, ret); return ret; } diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index 98eedb90cdc3..c9d4c872e81d 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -907,8 +907,8 @@ static const char * chip_ids[ 16 ] = { ({ \ int __b = SMC_CURRENT_BANK(lp); \ if (unlikely((__b & ~0xf0) != (0x3300 | bank))) { \ - printk( "%s: bank reg screwed (0x%04x)\n", \ - CARDNAME, __b ); \ + pr_err("%s: bank reg screwed (0x%04x)\n", \ + CARDNAME, __b); \ BUG(); \ } \ reg<<SMC_IO_SHIFT; \ diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 01f8459c3213..8564f23a6796 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2167,7 +2167,7 @@ static int smsc911x_init(struct net_device *dev) udelay(1000); if (to == 0) { - pr_err("Device not READY in 100ms aborting\n"); + netdev_err(dev, "Device not READY in 100ms aborting\n"); return -ENODEV; } diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index e55e3365a306..059bcafc5e62 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -99,17 +99,17 @@ MODULE_PARM_DESC(debug, "debug level"); #define smsc_dbg(TYPE, f, a...) \ do { if ((pd)->msg_enable & NETIF_MSG_##TYPE) \ - printk(KERN_DEBUG PFX f "\n", ## a); \ + netdev_dbg((pd)->dev, PFX f "\n", ## a); \ } while (0) #define smsc_info(TYPE, f, a...) \ do { if ((pd)->msg_enable & NETIF_MSG_##TYPE) \ - printk(KERN_INFO PFX f "\n", ## a); \ + netdev_info((pd)->dev, PFX f "\n", ## a); \ } while (0) #define smsc_warn(TYPE, f, a...) \ do { if ((pd)->msg_enable & NETIF_MSG_##TYPE) \ - printk(KERN_WARNING PFX f "\n", ## a); \ + netdev_warn((pd)->dev, PFX f "\n", ## a); \ } while (0) static inline u32 smsc9420_reg_read(struct smsc9420_pdata *pd, u32 offset) @@ -1168,7 +1168,7 @@ static int smsc9420_mii_probe(struct net_device *dev) /* Device only supports internal PHY at address 1 */ if (!pd->mii_bus->phy_map[1]) { - pr_err("%s: no PHY found at address 1\n", dev->name); + netdev_err(dev, "no PHY found at address 1\n"); return -ENODEV; } @@ -1180,12 +1180,12 @@ static int smsc9420_mii_probe(struct net_device *dev) smsc9420_phy_adjust_link, PHY_INTERFACE_MODE_MII); if (IS_ERR(phydev)) { - pr_err("%s: Could not attach to PHY\n", dev->name); + netdev_err(dev, "Could not attach to PHY\n"); return PTR_ERR(phydev); } - pr_info("%s: attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - dev->name, phydev->drv->name, dev_name(&phydev->dev), phydev->irq); + netdev_info(dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", + phydev->drv->name, dev_name(&phydev->dev), phydev->irq); /* mask with MAC supported features */ phydev->supported &= (PHY_BASIC_FEATURES | SUPPORTED_Pause | @@ -1582,12 +1582,12 @@ smsc9420_probe(struct pci_dev *pdev, const struct pci_device_id *id) int result = 0; u32 id_rev; - printk(KERN_INFO DRV_DESCRIPTION " version " DRV_VERSION "\n"); + pr_info(DRV_DESCRIPTION " version " DRV_VERSION "\n"); /* First do the PCI initialisation */ result = pci_enable_device(pdev); if (unlikely(result)) { - printk(KERN_ERR "Cannot enable smsc9420\n"); + pr_err("Cannot enable smsc9420\n"); goto out_0; } @@ -1600,24 +1600,24 @@ smsc9420_probe(struct pci_dev *pdev, const struct pci_device_id *id) SET_NETDEV_DEV(dev, &pdev->dev); if (!(pci_resource_flags(pdev, SMSC_BAR) & IORESOURCE_MEM)) { - printk(KERN_ERR "Cannot find PCI device base address\n"); + netdev_err(dev, "Cannot find PCI device base address\n"); goto out_free_netdev_2; } if ((pci_request_regions(pdev, DRV_NAME))) { - printk(KERN_ERR "Cannot obtain PCI resources, aborting.\n"); + netdev_err(dev, "Cannot obtain PCI resources, aborting.\n"); goto out_free_netdev_2; } if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { - printk(KERN_ERR "No usable DMA configuration, aborting.\n"); + netdev_err(dev, "No usable DMA configuration, aborting.\n"); goto out_free_regions_3; } virt_addr = ioremap(pci_resource_start(pdev, SMSC_BAR), pci_resource_len(pdev, SMSC_BAR)); if (!virt_addr) { - printk(KERN_ERR "Cannot map device registers, aborting.\n"); + netdev_err(dev, "Cannot map device registers, aborting.\n"); goto out_free_regions_3; } diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index a8ef4c4b94be..ba2f5e710af1 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -312,6 +312,7 @@ static ssize_t store_enabled(struct netconsole_target *nt, const char *buf, size_t count) { + unsigned long flags; int enabled; int err; @@ -326,9 +327,7 @@ static ssize_t store_enabled(struct netconsole_target *nt, return -EINVAL; } - mutex_lock(&nt->mutex); if (enabled) { /* 1 */ - /* * Skip netpoll_parse_options() -- all the attributes are * already configured via configfs. Just print them out. @@ -336,19 +335,22 @@ static ssize_t store_enabled(struct netconsole_target *nt, netpoll_print_options(&nt->np); err = netpoll_setup(&nt->np); - if (err) { - mutex_unlock(&nt->mutex); + if (err) return err; - } - - pr_info("network logging started\n"); + pr_info("netconsole: network logging started\n"); } else { /* 0 */ + /* We need to disable the netconsole before cleaning it up + * otherwise we might end up in write_msg() with + * nt->np.dev == NULL and nt->enabled == 1 + */ + spin_lock_irqsave(&target_list_lock, flags); + nt->enabled = 0; + spin_unlock_irqrestore(&target_list_lock, flags); netpoll_cleanup(&nt->np); } nt->enabled = enabled; - mutex_unlock(&nt->mutex); return strnlen(buf, count); } @@ -559,8 +561,10 @@ static ssize_t netconsole_target_attr_store(struct config_item *item, struct netconsole_target_attr *na = container_of(attr, struct netconsole_target_attr, attr); + mutex_lock(&nt->mutex); if (na->store) ret = na->store(nt, buf, count); + mutex_unlock(&nt->mutex); return ret; } diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 846cc19c04f2..8e8d0fcd4979 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -78,7 +78,6 @@ #define AX_MEDIUM_STATUS_MODE 0x22 #define AX_MEDIUM_GIGAMODE 0x01 #define AX_MEDIUM_FULL_DUPLEX 0x02 - #define AX_MEDIUM_ALWAYS_ONE 0x04 #define AX_MEDIUM_EN_125MHZ 0x08 #define AX_MEDIUM_RXFLOW_CTRLEN 0x10 #define AX_MEDIUM_TXFLOW_CTRLEN 0x20 @@ -1065,8 +1064,8 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) /* Configure default medium type => giga */ *tmp16 = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN | - AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_ALWAYS_ONE | - AX_MEDIUM_FULL_DUPLEX | AX_MEDIUM_GIGAMODE; + AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_FULL_DUPLEX | + AX_MEDIUM_GIGAMODE; ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, 2, 2, tmp16); @@ -1225,7 +1224,7 @@ static int ax88179_link_reset(struct usbnet *dev) } mode = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN | - AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_ALWAYS_ONE; + AX_MEDIUM_RXFLOW_CTRLEN; ax88179_read_cmd(dev, AX_ACCESS_MAC, PHYSICAL_LINK_STATUS, 1, 1, &link_sts); @@ -1339,8 +1338,8 @@ static int ax88179_reset(struct usbnet *dev) /* Configure default medium type => giga */ *tmp16 = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN | - AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_ALWAYS_ONE | - AX_MEDIUM_FULL_DUPLEX | AX_MEDIUM_GIGAMODE; + AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_FULL_DUPLEX | + AX_MEDIUM_GIGAMODE; ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, 2, 2, tmp16); diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 25ba7eca9a13..c9f3281506af 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -21,6 +21,8 @@ #include <linux/usb/usbnet.h> #include <linux/usb/cdc-wdm.h> #include <linux/usb/cdc_ncm.h> +#include <net/ipv6.h> +#include <net/addrconf.h> /* driver specific data - must match cdc_ncm usage */ struct cdc_mbim_state { @@ -42,13 +44,11 @@ static int cdc_mbim_manage_power(struct usbnet *dev, int on) if ((on && atomic_add_return(1, &info->pmcount) == 1) || (!on && atomic_dec_and_test(&info->pmcount))) { /* need autopm_get/put here to ensure the usbcore sees the new value */ rv = usb_autopm_get_interface(dev->intf); - if (rv < 0) - goto err; dev->intf->needs_remote_wakeup = on; - usb_autopm_put_interface(dev->intf); + if (!rv) + usb_autopm_put_interface(dev->intf); } -err: - return rv; + return 0; } static int cdc_mbim_wdm_manage_power(struct usb_interface *intf, int status) @@ -173,7 +173,7 @@ static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb } spin_lock_bh(&ctx->mtx); - skb_out = cdc_ncm_fill_tx_frame(ctx, skb, sign); + skb_out = cdc_ncm_fill_tx_frame(dev, skb, sign); spin_unlock_bh(&ctx->mtx); return skb_out; @@ -184,6 +184,60 @@ error: return NULL; } +/* Some devices are known to send Neigbor Solicitation messages and + * require Neigbor Advertisement replies. The IPv6 core will not + * respond since IFF_NOARP is set, so we must handle them ourselves. + */ +static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci) +{ + struct ipv6hdr *iph = (void *)buf; + struct nd_msg *msg = (void *)(iph + 1); + struct net_device *netdev; + struct inet6_dev *in6_dev; + bool is_router; + + /* we'll only respond to requests from unicast addresses to + * our solicited node addresses. + */ + if (!ipv6_addr_is_solict_mult(&iph->daddr) || + !(ipv6_addr_type(&iph->saddr) & IPV6_ADDR_UNICAST)) + return; + + /* need to send the NA on the VLAN dev, if any */ + if (tci) + netdev = __vlan_find_dev_deep(dev->net, htons(ETH_P_8021Q), + tci); + else + netdev = dev->net; + if (!netdev) + return; + + in6_dev = in6_dev_get(netdev); + if (!in6_dev) + return; + is_router = !!in6_dev->cnf.forwarding; + in6_dev_put(in6_dev); + + /* ipv6_stub != NULL if in6_dev_get returned an inet6_dev */ + ipv6_stub->ndisc_send_na(netdev, NULL, &iph->saddr, &msg->target, + is_router /* router */, + true /* solicited */, + false /* override */, + true /* inc_opt */); +} + +static bool is_neigh_solicit(u8 *buf, size_t len) +{ + struct ipv6hdr *iph = (void *)buf; + struct nd_msg *msg = (void *)(iph + 1); + + return (len >= sizeof(struct ipv6hdr) + sizeof(struct nd_msg) && + iph->nexthdr == IPPROTO_ICMPV6 && + msg->icmph.icmp6_code == 0 && + msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION); +} + + static struct sk_buff *cdc_mbim_process_dgram(struct usbnet *dev, u8 *buf, size_t len, u16 tci) { __be16 proto = htons(ETH_P_802_3); @@ -198,6 +252,8 @@ static struct sk_buff *cdc_mbim_process_dgram(struct usbnet *dev, u8 *buf, size_ proto = htons(ETH_P_IP); break; case 0x60: + if (is_neigh_solicit(buf, len)) + do_neigh_solicit(dev, buf, tci); proto = htons(ETH_P_IPV6); break; default: @@ -313,15 +369,13 @@ error: static int cdc_mbim_suspend(struct usb_interface *intf, pm_message_t message) { - int ret = 0; + int ret = -ENODEV; struct usbnet *dev = usb_get_intfdata(intf); struct cdc_mbim_state *info = (void *)&dev->data; struct cdc_ncm_ctx *ctx = info->ctx; - if (ctx == NULL) { - ret = -1; + if (!ctx) goto error; - } /* * Both usbnet_suspend() and subdriver->suspend() MUST return 0 @@ -354,7 +408,7 @@ static int cdc_mbim_resume(struct usb_interface *intf) if (ret < 0) goto err; ret = usbnet_resume(intf); - if (ret < 0 && callsub && info->subdriver->suspend) + if (ret < 0 && callsub) info->subdriver->suspend(intf, PMSG_SUSPEND); err: return ret; @@ -371,9 +425,18 @@ static const struct driver_info cdc_mbim_info = { }; /* MBIM and NCM devices should not need a ZLP after NTBs with - * dwNtbOutMaxSize length. This driver_info is for the exceptional - * devices requiring it anyway, allowing them to be supported without - * forcing the performance penalty on all the sane devices. + * dwNtbOutMaxSize length. Nevertheless, a number of devices from + * different vendor IDs will fail unless we send ZLPs, forcing us + * to make this the default. + * + * This default may cause a performance penalty for spec conforming + * devices wanting to take advantage of optimizations possible without + * ZLPs. A whitelist is added in an attempt to avoid this for devices + * known to conform to the MBIM specification. + * + * All known devices supporting NCM compatibility mode are also + * conforming to the NCM and MBIM specifications. For this reason, the + * NCM subclass entry is also in the ZLP whitelist. */ static const struct driver_info cdc_mbim_info_zlp = { .description = "CDC MBIM", @@ -396,16 +459,13 @@ static const struct usb_device_id mbim_devs[] = { { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info, }, - /* Sierra Wireless MC7710 need ZLPs */ - { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68a2, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long)&cdc_mbim_info_zlp, - }, - /* HP hs2434 Mobile Broadband Module needs ZLPs */ - { USB_DEVICE_AND_INTERFACE_INFO(0x3f0, 0x4b1d, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long)&cdc_mbim_info_zlp, + /* ZLP conformance whitelist: All Ericsson MBIM devices */ + { USB_VENDOR_AND_INTERFACE_INFO(0x0bdb, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&cdc_mbim_info, }, + /* default entry */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long)&cdc_mbim_info, + .driver_info = (unsigned long)&cdc_mbim_info_zlp, }, { }, diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 43afde8f48d2..11c703337577 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -53,8 +53,6 @@ #include <linux/usb/cdc.h> #include <linux/usb/cdc_ncm.h> -#define DRIVER_VERSION "14-Mar-2012" - #if IS_ENABLED(CONFIG_USB_NET_CDC_MBIM) static bool prefer_mbim = true; #else @@ -68,71 +66,67 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx); static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer); static struct usb_driver cdc_ncm_driver; -static void -cdc_ncm_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) -{ - struct usbnet *dev = netdev_priv(net); - - strlcpy(info->driver, dev->driver_name, sizeof(info->driver)); - strlcpy(info->version, DRIVER_VERSION, sizeof(info->version)); - strlcpy(info->fw_version, dev->driver_info->description, - sizeof(info->fw_version)); - usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info)); -} - -static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) +static u8 cdc_ncm_setup(struct usbnet *dev) { + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + struct usb_cdc_ncm_ntb_parameters ncm_parm; u32 val; u8 flags; u8 iface_no; int err; int eth_hlen; u16 ntb_fmt_supported; - u32 min_dgram_size; - u32 min_hdr_size; - struct usbnet *dev = netdev_priv(ctx->netdev); + __le16 max_datagram_size; iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber; err = usbnet_read_cmd(dev, USB_CDC_GET_NTB_PARAMETERS, USB_TYPE_CLASS | USB_DIR_IN |USB_RECIP_INTERFACE, - 0, iface_no, &ctx->ncm_parm, - sizeof(ctx->ncm_parm)); + 0, iface_no, &ncm_parm, + sizeof(ncm_parm)); if (err < 0) { - pr_debug("failed GET_NTB_PARAMETERS\n"); - return 1; + dev_err(&dev->intf->dev, "failed GET_NTB_PARAMETERS\n"); + return err; /* GET_NTB_PARAMETERS is required */ } /* read correct set of parameters according to device mode */ - ctx->rx_max = le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize); - ctx->tx_max = le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize); - ctx->tx_remainder = le16_to_cpu(ctx->ncm_parm.wNdpOutPayloadRemainder); - ctx->tx_modulus = le16_to_cpu(ctx->ncm_parm.wNdpOutDivisor); - ctx->tx_ndp_modulus = le16_to_cpu(ctx->ncm_parm.wNdpOutAlignment); + ctx->rx_max = le32_to_cpu(ncm_parm.dwNtbInMaxSize); + ctx->tx_max = le32_to_cpu(ncm_parm.dwNtbOutMaxSize); + ctx->tx_remainder = le16_to_cpu(ncm_parm.wNdpOutPayloadRemainder); + ctx->tx_modulus = le16_to_cpu(ncm_parm.wNdpOutDivisor); + ctx->tx_ndp_modulus = le16_to_cpu(ncm_parm.wNdpOutAlignment); /* devices prior to NCM Errata shall set this field to zero */ - ctx->tx_max_datagrams = le16_to_cpu(ctx->ncm_parm.wNtbOutMaxDatagrams); - ntb_fmt_supported = le16_to_cpu(ctx->ncm_parm.bmNtbFormatsSupported); + ctx->tx_max_datagrams = le16_to_cpu(ncm_parm.wNtbOutMaxDatagrams); + ntb_fmt_supported = le16_to_cpu(ncm_parm.bmNtbFormatsSupported); - eth_hlen = ETH_HLEN; - min_dgram_size = CDC_NCM_MIN_DATAGRAM_SIZE; - min_hdr_size = CDC_NCM_MIN_HDR_SIZE; - if (ctx->mbim_desc != NULL) { - flags = ctx->mbim_desc->bmNetworkCapabilities; + /* there are some minor differences in NCM and MBIM defaults */ + if (cdc_ncm_comm_intf_is_mbim(ctx->control->cur_altsetting)) { + if (!ctx->mbim_desc) + return -EINVAL; eth_hlen = 0; - min_dgram_size = CDC_MBIM_MIN_DATAGRAM_SIZE; - min_hdr_size = 0; - } else if (ctx->func_desc != NULL) { - flags = ctx->func_desc->bmNetworkCapabilities; + flags = ctx->mbim_desc->bmNetworkCapabilities; + ctx->max_datagram_size = le16_to_cpu(ctx->mbim_desc->wMaxSegmentSize); + if (ctx->max_datagram_size < CDC_MBIM_MIN_DATAGRAM_SIZE) + ctx->max_datagram_size = CDC_MBIM_MIN_DATAGRAM_SIZE; } else { - flags = 0; + if (!ctx->func_desc) + return -EINVAL; + eth_hlen = ETH_HLEN; + flags = ctx->func_desc->bmNetworkCapabilities; + ctx->max_datagram_size = le16_to_cpu(ctx->ether_desc->wMaxSegmentSize); + if (ctx->max_datagram_size < CDC_NCM_MIN_DATAGRAM_SIZE) + ctx->max_datagram_size = CDC_NCM_MIN_DATAGRAM_SIZE; } - pr_debug("dwNtbInMaxSize=%u dwNtbOutMaxSize=%u " - "wNdpOutPayloadRemainder=%u wNdpOutDivisor=%u " - "wNdpOutAlignment=%u wNtbOutMaxDatagrams=%u flags=0x%x\n", - ctx->rx_max, ctx->tx_max, ctx->tx_remainder, ctx->tx_modulus, - ctx->tx_ndp_modulus, ctx->tx_max_datagrams, flags); + /* common absolute max for NCM and MBIM */ + if (ctx->max_datagram_size > CDC_NCM_MAX_DATAGRAM_SIZE) + ctx->max_datagram_size = CDC_NCM_MAX_DATAGRAM_SIZE; + + dev_dbg(&dev->intf->dev, + "dwNtbInMaxSize=%u dwNtbOutMaxSize=%u wNdpOutPayloadRemainder=%u wNdpOutDivisor=%u wNdpOutAlignment=%u wNtbOutMaxDatagrams=%u flags=0x%x\n", + ctx->rx_max, ctx->tx_max, ctx->tx_remainder, ctx->tx_modulus, + ctx->tx_ndp_modulus, ctx->tx_max_datagrams, flags); /* max count of tx datagrams */ if ((ctx->tx_max_datagrams == 0) || @@ -141,19 +135,19 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) /* verify maximum size of received NTB in bytes */ if (ctx->rx_max < USB_CDC_NCM_NTB_MIN_IN_SIZE) { - pr_debug("Using min receive length=%d\n", - USB_CDC_NCM_NTB_MIN_IN_SIZE); + dev_dbg(&dev->intf->dev, "Using min receive length=%d\n", + USB_CDC_NCM_NTB_MIN_IN_SIZE); ctx->rx_max = USB_CDC_NCM_NTB_MIN_IN_SIZE; } if (ctx->rx_max > CDC_NCM_NTB_MAX_SIZE_RX) { - pr_debug("Using default maximum receive length=%d\n", - CDC_NCM_NTB_MAX_SIZE_RX); + dev_dbg(&dev->intf->dev, "Using default maximum receive length=%d\n", + CDC_NCM_NTB_MAX_SIZE_RX); ctx->rx_max = CDC_NCM_NTB_MAX_SIZE_RX; } /* inform device about NTB input size changes */ - if (ctx->rx_max != le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)) { + if (ctx->rx_max != le32_to_cpu(ncm_parm.dwNtbInMaxSize)) { __le32 dwNtbInMaxSize = cpu_to_le32(ctx->rx_max); err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_INPUT_SIZE, @@ -161,16 +155,22 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) | USB_RECIP_INTERFACE, 0, iface_no, &dwNtbInMaxSize, 4); if (err < 0) - pr_debug("Setting NTB Input Size failed\n"); + dev_dbg(&dev->intf->dev, "Setting NTB Input Size failed\n"); } /* verify maximum size of transmitted NTB in bytes */ - if ((ctx->tx_max < - (min_hdr_size + min_dgram_size)) || - (ctx->tx_max > CDC_NCM_NTB_MAX_SIZE_TX)) { - pr_debug("Using default maximum transmit length=%d\n", - CDC_NCM_NTB_MAX_SIZE_TX); + if (ctx->tx_max > CDC_NCM_NTB_MAX_SIZE_TX) { + dev_dbg(&dev->intf->dev, "Using default maximum transmit length=%d\n", + CDC_NCM_NTB_MAX_SIZE_TX); ctx->tx_max = CDC_NCM_NTB_MAX_SIZE_TX; + + /* Adding a pad byte here simplifies the handling in + * cdc_ncm_fill_tx_frame, by making tx_max always + * represent the real skb max size. + */ + if (ctx->tx_max % usb_maxpacket(dev->udev, dev->out, 1) == 0) + ctx->tx_max++; + } /* @@ -183,7 +183,7 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) if ((val < USB_CDC_NCM_NDP_ALIGN_MIN_SIZE) || (val != ((-val) & val)) || (val >= ctx->tx_max)) { - pr_debug("Using default alignment: 4 bytes\n"); + dev_dbg(&dev->intf->dev, "Using default alignment: 4 bytes\n"); ctx->tx_ndp_modulus = USB_CDC_NCM_NDP_ALIGN_MIN_SIZE; } @@ -197,13 +197,13 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) if ((val < USB_CDC_NCM_NDP_ALIGN_MIN_SIZE) || (val != ((-val) & val)) || (val >= ctx->tx_max)) { - pr_debug("Using default transmit modulus: 4 bytes\n"); + dev_dbg(&dev->intf->dev, "Using default transmit modulus: 4 bytes\n"); ctx->tx_modulus = USB_CDC_NCM_NDP_ALIGN_MIN_SIZE; } /* verify the payload remainder */ if (ctx->tx_remainder >= ctx->tx_modulus) { - pr_debug("Using default transmit remainder: 0 bytes\n"); + dev_dbg(&dev->intf->dev, "Using default transmit remainder: 0 bytes\n"); ctx->tx_remainder = 0; } @@ -221,7 +221,7 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) USB_CDC_NCM_CRC_NOT_APPENDED, iface_no, NULL, 0); if (err < 0) - pr_debug("Setting CRC mode off failed\n"); + dev_dbg(&dev->intf->dev, "Setting CRC mode off failed\n"); } /* set NTB format, if both formats are supported */ @@ -232,69 +232,43 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) USB_CDC_NCM_NTB16_FORMAT, iface_no, NULL, 0); if (err < 0) - pr_debug("Setting NTB format to 16-bit failed\n"); + dev_dbg(&dev->intf->dev, "Setting NTB format to 16-bit failed\n"); } - ctx->max_datagram_size = min_dgram_size; + /* inform the device about the selected Max Datagram Size */ + if (!(flags & USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE)) + goto out; - /* set Max Datagram Size (MTU) */ - if (flags & USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE) { - __le16 max_datagram_size; - u16 eth_max_sz; - if (ctx->ether_desc != NULL) - eth_max_sz = le16_to_cpu(ctx->ether_desc->wMaxSegmentSize); - else if (ctx->mbim_desc != NULL) - eth_max_sz = le16_to_cpu(ctx->mbim_desc->wMaxSegmentSize); - else - goto max_dgram_err; - - err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE, - USB_TYPE_CLASS | USB_DIR_IN - | USB_RECIP_INTERFACE, - 0, iface_no, &max_datagram_size, 2); - if (err < 0) { - pr_debug("GET_MAX_DATAGRAM_SIZE failed, use size=%u\n", - min_dgram_size); - } else { - ctx->max_datagram_size = - le16_to_cpu(max_datagram_size); - /* Check Eth descriptor value */ - if (ctx->max_datagram_size > eth_max_sz) - ctx->max_datagram_size = eth_max_sz; - - if (ctx->max_datagram_size > CDC_NCM_MAX_DATAGRAM_SIZE) - ctx->max_datagram_size = CDC_NCM_MAX_DATAGRAM_SIZE; - - if (ctx->max_datagram_size < min_dgram_size) - ctx->max_datagram_size = min_dgram_size; - - /* if value changed, update device */ - if (ctx->max_datagram_size != - le16_to_cpu(max_datagram_size)) { - err = usbnet_write_cmd(dev, - USB_CDC_SET_MAX_DATAGRAM_SIZE, - USB_TYPE_CLASS | USB_DIR_OUT - | USB_RECIP_INTERFACE, - 0, - iface_no, &max_datagram_size, - 2); - if (err < 0) - pr_debug("SET_MAX_DGRAM_SIZE failed\n"); - } - } + /* read current mtu value from device */ + err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE, + USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE, + 0, iface_no, &max_datagram_size, 2); + if (err < 0) { + dev_dbg(&dev->intf->dev, "GET_MAX_DATAGRAM_SIZE failed\n"); + goto out; } -max_dgram_err: - if (ctx->netdev->mtu != (ctx->max_datagram_size - eth_hlen)) - ctx->netdev->mtu = ctx->max_datagram_size - eth_hlen; + if (le16_to_cpu(max_datagram_size) == ctx->max_datagram_size) + goto out; + max_datagram_size = cpu_to_le16(ctx->max_datagram_size); + err = usbnet_write_cmd(dev, USB_CDC_SET_MAX_DATAGRAM_SIZE, + USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE, + 0, iface_no, &max_datagram_size, 2); + if (err < 0) + dev_dbg(&dev->intf->dev, "SET_MAX_DATAGRAM_SIZE failed\n"); + +out: + /* set MTU to max supported by the device if necessary */ + if (dev->net->mtu > ctx->max_datagram_size - eth_hlen) + dev->net->mtu = ctx->max_datagram_size - eth_hlen; return 0; } static void -cdc_ncm_find_endpoints(struct cdc_ncm_ctx *ctx, struct usb_interface *intf) +cdc_ncm_find_endpoints(struct usbnet *dev, struct usb_interface *intf) { - struct usb_host_endpoint *e; + struct usb_host_endpoint *e, *in = NULL, *out = NULL; u8 ep; for (ep = 0; ep < intf->cur_altsetting->desc.bNumEndpoints; ep++) { @@ -303,18 +277,18 @@ cdc_ncm_find_endpoints(struct cdc_ncm_ctx *ctx, struct usb_interface *intf) switch (e->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) { case USB_ENDPOINT_XFER_INT: if (usb_endpoint_dir_in(&e->desc)) { - if (ctx->status_ep == NULL) - ctx->status_ep = e; + if (!dev->status) + dev->status = e; } break; case USB_ENDPOINT_XFER_BULK: if (usb_endpoint_dir_in(&e->desc)) { - if (ctx->in_ep == NULL) - ctx->in_ep = e; + if (!in) + in = e; } else { - if (ctx->out_ep == NULL) - ctx->out_ep = e; + if (!out) + out = e; } break; @@ -322,6 +296,14 @@ cdc_ncm_find_endpoints(struct cdc_ncm_ctx *ctx, struct usb_interface *intf) break; } } + if (in && !dev->in) + dev->in = usb_rcvbulkpipe(dev->udev, + in->desc.bEndpointAddress & + USB_ENDPOINT_NUMBER_MASK); + if (out && !dev->out) + dev->out = usb_sndbulkpipe(dev->udev, + out->desc.bEndpointAddress & + USB_ENDPOINT_NUMBER_MASK); } static void cdc_ncm_free(struct cdc_ncm_ctx *ctx) @@ -342,18 +324,9 @@ static void cdc_ncm_free(struct cdc_ncm_ctx *ctx) kfree(ctx); } -static const struct ethtool_ops cdc_ncm_ethtool_ops = { - .get_drvinfo = cdc_ncm_get_drvinfo, - .get_link = usbnet_get_link, - .get_msglevel = usbnet_get_msglevel, - .set_msglevel = usbnet_set_msglevel, - .get_settings = usbnet_get_settings, - .set_settings = usbnet_set_settings, - .nway_reset = usbnet_nway_reset, -}; - int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting) { + const struct usb_cdc_union_desc *union_desc = NULL; struct cdc_ncm_ctx *ctx; struct usb_driver *driver; u8 *buf; @@ -367,23 +340,22 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ hrtimer_init(&ctx->tx_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ctx->tx_timer.function = &cdc_ncm_tx_timer_cb; - ctx->bh.data = (unsigned long)ctx; + ctx->bh.data = (unsigned long)dev; ctx->bh.func = cdc_ncm_txpath_bh; atomic_set(&ctx->stop, 0); spin_lock_init(&ctx->mtx); - ctx->netdev = dev->net; /* store ctx pointer in device data field */ dev->data[0] = (unsigned long)ctx; + /* only the control interface can be successfully probed */ + ctx->control = intf; + /* get some pointers */ driver = driver_of(intf); buf = intf->cur_altsetting->extra; len = intf->cur_altsetting->extralen; - ctx->udev = dev->udev; - ctx->intf = intf; - /* parse through descriptors associated with control interface */ while ((len > 0) && (buf[0] > 2) && (buf[0] <= len)) { @@ -392,16 +364,18 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ switch (buf[2]) { case USB_CDC_UNION_TYPE: - if (buf[0] < sizeof(*(ctx->union_desc))) + if (buf[0] < sizeof(*union_desc)) break; - ctx->union_desc = - (const struct usb_cdc_union_desc *)buf; - - ctx->control = usb_ifnum_to_if(dev->udev, - ctx->union_desc->bMasterInterface0); + union_desc = (const struct usb_cdc_union_desc *)buf; + /* the master must be the interface we are probing */ + if (intf->cur_altsetting->desc.bInterfaceNumber != + union_desc->bMasterInterface0) { + dev_dbg(&intf->dev, "bogus CDC Union\n"); + goto error; + } ctx->data = usb_ifnum_to_if(dev->udev, - ctx->union_desc->bSlaveInterface0); + union_desc->bSlaveInterface0); break; case USB_CDC_ETHERNET_TYPE: @@ -410,13 +384,6 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ ctx->ether_desc = (const struct usb_cdc_ether_desc *)buf; - dev->hard_mtu = - le16_to_cpu(ctx->ether_desc->wMaxSegmentSize); - - if (dev->hard_mtu < CDC_NCM_MIN_DATAGRAM_SIZE) - dev->hard_mtu = CDC_NCM_MIN_DATAGRAM_SIZE; - else if (dev->hard_mtu > CDC_NCM_MAX_DATAGRAM_SIZE) - dev->hard_mtu = CDC_NCM_MAX_DATAGRAM_SIZE; break; case USB_CDC_NCM_TYPE: @@ -444,69 +411,71 @@ advance: } /* some buggy devices have an IAD but no CDC Union */ - if (!ctx->union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) { - ctx->control = intf; + if (!union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) { ctx->data = usb_ifnum_to_if(dev->udev, intf->cur_altsetting->desc.bInterfaceNumber + 1); dev_dbg(&intf->dev, "CDC Union missing - got slave from IAD\n"); } /* check if we got everything */ - if ((ctx->control == NULL) || (ctx->data == NULL) || - ((!ctx->mbim_desc) && ((ctx->ether_desc == NULL) || (ctx->control != intf)))) + if (!ctx->data || (!ctx->mbim_desc && !ctx->ether_desc)) { + dev_dbg(&intf->dev, "CDC descriptors missing\n"); goto error; + } /* claim data interface, if different from control */ if (ctx->data != ctx->control) { temp = usb_driver_claim_interface(driver, ctx->data, dev); - if (temp) + if (temp) { + dev_dbg(&intf->dev, "failed to claim data intf\n"); goto error; + } } iface_no = ctx->data->cur_altsetting->desc.bInterfaceNumber; /* reset data interface */ temp = usb_set_interface(dev->udev, iface_no, 0); - if (temp) - goto error2; - - /* initialize data interface */ - if (cdc_ncm_setup(ctx)) + if (temp) { + dev_dbg(&intf->dev, "set interface failed\n"); goto error2; + } /* configure data interface */ temp = usb_set_interface(dev->udev, iface_no, data_altsetting); - if (temp) + if (temp) { + dev_dbg(&intf->dev, "set interface failed\n"); goto error2; + } - cdc_ncm_find_endpoints(ctx, ctx->data); - cdc_ncm_find_endpoints(ctx, ctx->control); - - if ((ctx->in_ep == NULL) || (ctx->out_ep == NULL) || - (ctx->status_ep == NULL)) + cdc_ncm_find_endpoints(dev, ctx->data); + cdc_ncm_find_endpoints(dev, ctx->control); + if (!dev->in || !dev->out || !dev->status) { + dev_dbg(&intf->dev, "failed to collect endpoints\n"); goto error2; + } - dev->net->ethtool_ops = &cdc_ncm_ethtool_ops; + /* initialize data interface */ + if (cdc_ncm_setup(dev)) { + dev_dbg(&intf->dev, "cdc_ncm_setup() failed\n"); + goto error2; + } usb_set_intfdata(ctx->data, dev); usb_set_intfdata(ctx->control, dev); - usb_set_intfdata(ctx->intf, dev); if (ctx->ether_desc) { temp = usbnet_get_ethernet_addr(dev, ctx->ether_desc->iMACAddress); - if (temp) + if (temp) { + dev_dbg(&intf->dev, "failed to get mac address\n"); goto error2; - dev_info(&dev->udev->dev, "MAC-Address: %pM\n", dev->net->dev_addr); + } + dev_info(&intf->dev, "MAC-Address: %pM\n", dev->net->dev_addr); } - - dev->in = usb_rcvbulkpipe(dev->udev, - ctx->in_ep->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); - dev->out = usb_sndbulkpipe(dev->udev, - ctx->out_ep->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); - dev->status = ctx->status_ep; + /* usbnet use these values for sizing tx/rx queues */ + dev->hard_mtu = ctx->tx_max; dev->rx_urb_size = ctx->rx_max; - ctx->tx_speed = ctx->rx_speed = 0; return 0; error2: @@ -517,7 +486,7 @@ error2: error: cdc_ncm_free((struct cdc_ncm_ctx *)dev->data[0]); dev->data[0] = 0; - dev_info(&dev->udev->dev, "bind() failure\n"); + dev_info(&intf->dev, "bind() failure\n"); return -ENODEV; } EXPORT_SYMBOL_GPL(cdc_ncm_bind_common); @@ -553,7 +522,7 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf) ctx->control = NULL; } - usb_set_intfdata(ctx->intf, NULL); + usb_set_intfdata(intf, NULL); cdc_ncm_free(ctx); } EXPORT_SYMBOL_GPL(cdc_ncm_unbind); @@ -662,8 +631,9 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ } struct sk_buff * -cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign) +cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) { + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; struct usb_cdc_ncm_nth16 *nth16; struct usb_cdc_ncm_ndp16 *ndp16; struct sk_buff *skb_out; @@ -683,11 +653,11 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign) /* allocate a new OUT skb */ if (!skb_out) { - skb_out = alloc_skb((ctx->tx_max + 1), GFP_ATOMIC); + skb_out = alloc_skb(ctx->tx_max, GFP_ATOMIC); if (skb_out == NULL) { if (skb != NULL) { dev_kfree_skb_any(skb); - ctx->netdev->stats.tx_dropped++; + dev->net->stats.tx_dropped++; } goto exit_no_skb; } @@ -725,12 +695,12 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign) /* won't fit, MTU problem? */ dev_kfree_skb_any(skb); skb = NULL; - ctx->netdev->stats.tx_dropped++; + dev->net->stats.tx_dropped++; } else { /* no room for skb - store for later */ if (ctx->tx_rem_skb != NULL) { dev_kfree_skb_any(ctx->tx_rem_skb); - ctx->netdev->stats.tx_dropped++; + dev->net->stats.tx_dropped++; } ctx->tx_rem_skb = skb; ctx->tx_rem_sign = sign; @@ -763,7 +733,7 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign) if (skb != NULL) { dev_kfree_skb_any(skb); skb = NULL; - ctx->netdev->stats.tx_dropped++; + dev->net->stats.tx_dropped++; } ctx->tx_curr_frame_num = n; @@ -788,19 +758,20 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign) /* variables will be reset at next call */ } - /* - * If collected data size is less or equal CDC_NCM_MIN_TX_PKT bytes, - * we send buffers as it is. If we get more data, it would be more - * efficient for USB HS mobile device with DMA engine to receive a full - * size NTB, than canceling DMA transfer and receiving a short packet. + /* If collected data size is less or equal CDC_NCM_MIN_TX_PKT + * bytes, we send buffers as it is. If we get more data, it + * would be more efficient for USB HS mobile device with DMA + * engine to receive a full size NTB, than canceling DMA + * transfer and receiving a short packet. + * + * This optimization support is pointless if we end up sending + * a ZLP after full sized NTBs. */ - if (skb_out->len > CDC_NCM_MIN_TX_PKT) - /* final zero padding */ - memset(skb_put(skb_out, ctx->tx_max - skb_out->len), 0, ctx->tx_max - skb_out->len); - - /* do we need to prevent a ZLP? */ - if (((skb_out->len % le16_to_cpu(ctx->out_ep->desc.wMaxPacketSize)) == 0) && - (skb_out->len < le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)) && skb_tailroom(skb_out)) + if (!(dev->driver_info->flags & FLAG_SEND_ZLP) && + skb_out->len > CDC_NCM_MIN_TX_PKT) + memset(skb_put(skb_out, ctx->tx_max - skb_out->len), 0, + ctx->tx_max - skb_out->len); + else if ((skb_out->len % dev->maxpacket) == 0) *skb_put(skb_out, 1) = 0; /* force short packet */ /* set final frame length */ @@ -809,7 +780,7 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign) /* return skb */ ctx->tx_curr_skb = NULL; - ctx->netdev->stats.tx_packets += ctx->tx_curr_frame_num; + dev->net->stats.tx_packets += ctx->tx_curr_frame_num; return skb_out; exit_no_skb: @@ -841,18 +812,19 @@ static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *timer) static void cdc_ncm_txpath_bh(unsigned long param) { - struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)param; + struct usbnet *dev = (struct usbnet *)param; + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; spin_lock_bh(&ctx->mtx); if (ctx->tx_timer_pending != 0) { ctx->tx_timer_pending--; cdc_ncm_tx_timeout_start(ctx); spin_unlock_bh(&ctx->mtx); - } else if (ctx->netdev != NULL) { + } else if (dev->net != NULL) { spin_unlock_bh(&ctx->mtx); - netif_tx_lock_bh(ctx->netdev); - usbnet_start_xmit(NULL, ctx->netdev); - netif_tx_unlock_bh(ctx->netdev); + netif_tx_lock_bh(dev->net); + usbnet_start_xmit(NULL, dev->net); + netif_tx_unlock_bh(dev->net); } else { spin_unlock_bh(&ctx->mtx); } @@ -875,7 +847,7 @@ cdc_ncm_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) goto error; spin_lock_bh(&ctx->mtx); - skb_out = cdc_ncm_fill_tx_frame(ctx, skb, cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN)); + skb_out = cdc_ncm_fill_tx_frame(dev, skb, cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN)); spin_unlock_bh(&ctx->mtx); return skb_out; @@ -889,6 +861,7 @@ error: /* verify NTB header and return offset of first NDP, or negative error */ int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in) { + struct usbnet *dev = netdev_priv(skb_in->dev); struct usb_cdc_ncm_nth16 *nth16; int len; int ret = -EINVAL; @@ -898,30 +871,33 @@ int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in) if (skb_in->len < (sizeof(struct usb_cdc_ncm_nth16) + sizeof(struct usb_cdc_ncm_ndp16))) { - pr_debug("frame too short\n"); + netif_dbg(dev, rx_err, dev->net, "frame too short\n"); goto error; } nth16 = (struct usb_cdc_ncm_nth16 *)skb_in->data; - if (le32_to_cpu(nth16->dwSignature) != USB_CDC_NCM_NTH16_SIGN) { - pr_debug("invalid NTH16 signature <%u>\n", - le32_to_cpu(nth16->dwSignature)); + if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) { + netif_dbg(dev, rx_err, dev->net, + "invalid NTH16 signature <%#010x>\n", + le32_to_cpu(nth16->dwSignature)); goto error; } len = le16_to_cpu(nth16->wBlockLength); if (len > ctx->rx_max) { - pr_debug("unsupported NTB block length %u/%u\n", len, - ctx->rx_max); + netif_dbg(dev, rx_err, dev->net, + "unsupported NTB block length %u/%u\n", len, + ctx->rx_max); goto error; } if ((ctx->rx_seq + 1) != le16_to_cpu(nth16->wSequence) && - (ctx->rx_seq || le16_to_cpu(nth16->wSequence)) && - !((ctx->rx_seq == 0xffff) && !le16_to_cpu(nth16->wSequence))) { - pr_debug("sequence number glitch prev=%d curr=%d\n", - ctx->rx_seq, le16_to_cpu(nth16->wSequence)); + (ctx->rx_seq || le16_to_cpu(nth16->wSequence)) && + !((ctx->rx_seq == 0xffff) && !le16_to_cpu(nth16->wSequence))) { + netif_dbg(dev, rx_err, dev->net, + "sequence number glitch prev=%d curr=%d\n", + ctx->rx_seq, le16_to_cpu(nth16->wSequence)); } ctx->rx_seq = le16_to_cpu(nth16->wSequence); @@ -934,18 +910,20 @@ EXPORT_SYMBOL_GPL(cdc_ncm_rx_verify_nth16); /* verify NDP header and return number of datagrams, or negative error */ int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset) { + struct usbnet *dev = netdev_priv(skb_in->dev); struct usb_cdc_ncm_ndp16 *ndp16; int ret = -EINVAL; if ((ndpoffset + sizeof(struct usb_cdc_ncm_ndp16)) > skb_in->len) { - pr_debug("invalid NDP offset <%u>\n", ndpoffset); + netif_dbg(dev, rx_err, dev->net, "invalid NDP offset <%u>\n", + ndpoffset); goto error; } ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb_in->data + ndpoffset); if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) { - pr_debug("invalid DPT16 length <%u>\n", - le32_to_cpu(ndp16->dwSignature)); + netif_dbg(dev, rx_err, dev->net, "invalid DPT16 length <%u>\n", + le16_to_cpu(ndp16->wLength)); goto error; } @@ -954,9 +932,9 @@ int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset) sizeof(struct usb_cdc_ncm_dpe16)); ret--; /* we process NDP entries except for the last one */ - if ((sizeof(struct usb_cdc_ncm_ndp16) + ret * (sizeof(struct usb_cdc_ncm_dpe16))) > - skb_in->len) { - pr_debug("Invalid nframes = %d\n", ret); + if ((sizeof(struct usb_cdc_ncm_ndp16) + + ret * (sizeof(struct usb_cdc_ncm_dpe16))) > skb_in->len) { + netif_dbg(dev, rx_err, dev->net, "Invalid nframes = %d\n", ret); ret = -EINVAL; } @@ -989,9 +967,10 @@ next_ndp: ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb_in->data + ndpoffset); - if (le32_to_cpu(ndp16->dwSignature) != USB_CDC_NCM_NDP16_NOCRC_SIGN) { - pr_debug("invalid DPT16 signature <%u>\n", - le32_to_cpu(ndp16->dwSignature)); + if (ndp16->dwSignature != cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN)) { + netif_dbg(dev, rx_err, dev->net, + "invalid DPT16 signature <%#010x>\n", + le32_to_cpu(ndp16->dwSignature)); goto err_ndp; } dpe16 = ndp16->dpe16; @@ -1013,9 +992,9 @@ next_ndp: /* sanity checking */ if (((offset + len) > skb_in->len) || (len > ctx->rx_max) || (len < ETH_HLEN)) { - pr_debug("invalid frame detected (ignored)" - "offset[%u]=%u, length=%u, skb=%p\n", - x, offset, len, skb_in); + netif_dbg(dev, rx_err, dev->net, + "invalid frame detected (ignored) offset[%u]=%u, length=%u, skb=%p\n", + x, offset, len, skb_in); if (!x) goto err_ndp; break; @@ -1042,7 +1021,7 @@ error: } static void -cdc_ncm_speed_change(struct cdc_ncm_ctx *ctx, +cdc_ncm_speed_change(struct usbnet *dev, struct usb_cdc_speed_change *data) { uint32_t rx_speed = le32_to_cpu(data->DLBitRRate); @@ -1052,25 +1031,16 @@ cdc_ncm_speed_change(struct cdc_ncm_ctx *ctx, * Currently the USB-NET API does not support reporting the actual * device speed. Do print it instead. */ - if ((tx_speed != ctx->tx_speed) || (rx_speed != ctx->rx_speed)) { - ctx->tx_speed = tx_speed; - ctx->rx_speed = rx_speed; - - if ((tx_speed > 1000000) && (rx_speed > 1000000)) { - printk(KERN_INFO KBUILD_MODNAME - ": %s: %u mbit/s downlink " - "%u mbit/s uplink\n", - ctx->netdev->name, - (unsigned int)(rx_speed / 1000000U), - (unsigned int)(tx_speed / 1000000U)); - } else { - printk(KERN_INFO KBUILD_MODNAME - ": %s: %u kbit/s downlink " - "%u kbit/s uplink\n", - ctx->netdev->name, - (unsigned int)(rx_speed / 1000U), - (unsigned int)(tx_speed / 1000U)); - } + if ((tx_speed > 1000000) && (rx_speed > 1000000)) { + netif_info(dev, link, dev->net, + "%u mbit/s downlink %u mbit/s uplink\n", + (unsigned int)(rx_speed / 1000000U), + (unsigned int)(tx_speed / 1000000U)); + } else { + netif_info(dev, link, dev->net, + "%u kbit/s downlink %u kbit/s uplink\n", + (unsigned int)(rx_speed / 1000U), + (unsigned int)(tx_speed / 1000U)); } } @@ -1086,7 +1056,7 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) /* test for split data in 8-byte chunks */ if (test_and_clear_bit(EVENT_STS_SPLIT, &dev->flags)) { - cdc_ncm_speed_change(ctx, + cdc_ncm_speed_change(dev, (struct usb_cdc_speed_change *)urb->transfer_buffer); return; } @@ -1101,14 +1071,10 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE. */ ctx->connected = le16_to_cpu(event->wValue); - - printk(KERN_INFO KBUILD_MODNAME ": %s: network connection:" - " %sconnected\n", - ctx->netdev->name, ctx->connected ? "" : "dis"); - + netif_info(dev, link, dev->net, + "network connection: %sconnected\n", + ctx->connected ? "" : "dis"); usbnet_link_change(dev, ctx->connected, 0); - if (!ctx->connected) - ctx->tx_speed = ctx->rx_speed = 0; break; case USB_CDC_NOTIFY_SPEED_CHANGE: @@ -1116,8 +1082,8 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) sizeof(struct usb_cdc_speed_change))) set_bit(EVENT_STS_SPLIT, &dev->flags); else - cdc_ncm_speed_change(ctx, - (struct usb_cdc_speed_change *) &event[1]); + cdc_ncm_speed_change(dev, + (struct usb_cdc_speed_change *)&event[1]); break; default: @@ -1139,22 +1105,6 @@ static int cdc_ncm_check_connect(struct usbnet *dev) return !ctx->connected; } -static int -cdc_ncm_probe(struct usb_interface *udev, const struct usb_device_id *prod) -{ - return usbnet_probe(udev, prod); -} - -static void cdc_ncm_disconnect(struct usb_interface *intf) -{ - struct usbnet *dev = usb_get_intfdata(intf); - - if (dev == NULL) - return; /* already disconnected */ - - usbnet_disconnect(intf); -} - static const struct driver_info cdc_ncm_info = { .description = "CDC NCM", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET, @@ -1265,8 +1215,8 @@ MODULE_DEVICE_TABLE(usb, cdc_devs); static struct usb_driver cdc_ncm_driver = { .name = "cdc_ncm", .id_table = cdc_devs, - .probe = cdc_ncm_probe, - .disconnect = cdc_ncm_disconnect, + .probe = usbnet_probe, + .disconnect = usbnet_disconnect, .suspend = usbnet_suspend, .resume = usbnet_resume, .reset_resume = usbnet_resume, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index e0a4a2b08e45..23bdd5b9274d 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -149,7 +149,7 @@ static const struct net_device_ops qmi_wwan_netdev_ops = { static int qmi_wwan_manage_power(struct usbnet *dev, int on) { struct qmi_wwan_state *info = (void *)&dev->data; - int rv = 0; + int rv; dev_dbg(&dev->intf->dev, "%s() pmcount=%d, on=%d\n", __func__, atomic_read(&info->pmcount), on); @@ -160,13 +160,11 @@ static int qmi_wwan_manage_power(struct usbnet *dev, int on) * the new value */ rv = usb_autopm_get_interface(dev->intf); - if (rv < 0) - goto err; dev->intf->needs_remote_wakeup = on; - usb_autopm_put_interface(dev->intf); + if (!rv) + usb_autopm_put_interface(dev->intf); } -err: - return rv; + return 0; } static int qmi_wwan_cdc_wdm_manage_power(struct usb_interface *intf, int on) @@ -418,7 +416,7 @@ static int qmi_wwan_resume(struct usb_interface *intf) if (ret < 0) goto err; ret = usbnet_resume(intf); - if (ret < 0 && callsub && info->subdriver->suspend) + if (ret < 0 && callsub) info->subdriver->suspend(intf, PMSG_SUSPEND); err: return ret; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 113ee93dbb2e..656a02e28e26 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1160,11 +1160,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, { struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb); - mutex_lock(&vi->config_lock); - - if (!vi->config_enable) - goto done; - switch(action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: case CPU_DOWN_FAILED: @@ -1178,8 +1173,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, break; } -done: - mutex_unlock(&vi->config_lock); return NOTIFY_OK; } @@ -1747,6 +1740,8 @@ static int virtnet_freeze(struct virtio_device *vdev) struct virtnet_info *vi = vdev->priv; int i; + unregister_hotcpu_notifier(&vi->nb); + /* Prevent config work handler from accessing the device */ mutex_lock(&vi->config_lock); vi->config_enable = false; @@ -1795,6 +1790,10 @@ static int virtnet_restore(struct virtio_device *vdev) virtnet_set_queues(vi, vi->curr_queue_pairs); rtnl_unlock(); + err = register_hotcpu_notifier(&vi->nb); + if (err) + return err; + return 0; } #endif diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c index 5bbcb5e3ee0c..388ddf60a66d 100644 --- a/drivers/net/wan/sbni.c +++ b/drivers/net/wan/sbni.c @@ -148,10 +148,6 @@ static int enslave( struct net_device *, struct net_device * ); static int emancipate( struct net_device * ); #endif -#ifdef __i386__ -#define ASM_CRC 1 -#endif - static const char version[] = "Granch SBNI12 driver ver 5.0.1 Jun 22 2001 Denis I.Timofeev.\n"; @@ -1551,88 +1547,6 @@ __setup( "sbni=", sbni_setup ); /* -------------------------------------------------------------------------- */ -#ifdef ASM_CRC - -static u32 -calc_crc32( u32 crc, u8 *p, u32 len ) -{ - register u32 _crc; - _crc = crc; - - __asm__ __volatile__ ( - "xorl %%ebx, %%ebx\n" - "movl %2, %%esi\n" - "movl %3, %%ecx\n" - "movl $crc32tab, %%edi\n" - "shrl $2, %%ecx\n" - "jz 1f\n" - - ".align 4\n" - "0:\n" - "movb %%al, %%bl\n" - "movl (%%esi), %%edx\n" - "shrl $8, %%eax\n" - "xorb %%dl, %%bl\n" - "shrl $8, %%edx\n" - "xorl (%%edi,%%ebx,4), %%eax\n" - - "movb %%al, %%bl\n" - "shrl $8, %%eax\n" - "xorb %%dl, %%bl\n" - "shrl $8, %%edx\n" - "xorl (%%edi,%%ebx,4), %%eax\n" - - "movb %%al, %%bl\n" - "shrl $8, %%eax\n" - "xorb %%dl, %%bl\n" - "movb %%dh, %%dl\n" - "xorl (%%edi,%%ebx,4), %%eax\n" - - "movb %%al, %%bl\n" - "shrl $8, %%eax\n" - "xorb %%dl, %%bl\n" - "addl $4, %%esi\n" - "xorl (%%edi,%%ebx,4), %%eax\n" - - "decl %%ecx\n" - "jnz 0b\n" - - "1:\n" - "movl %3, %%ecx\n" - "andl $3, %%ecx\n" - "jz 2f\n" - - "movb %%al, %%bl\n" - "shrl $8, %%eax\n" - "xorb (%%esi), %%bl\n" - "xorl (%%edi,%%ebx,4), %%eax\n" - - "decl %%ecx\n" - "jz 2f\n" - - "movb %%al, %%bl\n" - "shrl $8, %%eax\n" - "xorb 1(%%esi), %%bl\n" - "xorl (%%edi,%%ebx,4), %%eax\n" - - "decl %%ecx\n" - "jz 2f\n" - - "movb %%al, %%bl\n" - "shrl $8, %%eax\n" - "xorb 2(%%esi), %%bl\n" - "xorl (%%edi,%%ebx,4), %%eax\n" - "2:\n" - : "=a" (_crc) - : "0" (_crc), "g" (p), "g" (len) - : "bx", "cx", "dx", "si", "di" - ); - - return _crc; -} - -#else /* ASM_CRC */ - static u32 calc_crc32( u32 crc, u8 *p, u32 len ) { @@ -1642,9 +1556,6 @@ calc_crc32( u32 crc, u8 *p, u32 len ) return crc; } -#endif /* ASM_CRC */ - - static u32 crc32tab[] __attribute__ ((aligned(8))) = { 0xD202EF8D, 0xA505DF1B, 0x3C0C8EA1, 0x4B0BBE37, 0xD56F2B94, 0xA2681B02, 0x3B614AB8, 0x4C667A2E, diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 55b8dec86233..08ae01b41c83 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -169,6 +169,7 @@ struct xenvif { unsigned long credit_usec; unsigned long remaining_credit; struct timer_list credit_timeout; + u64 credit_window_start; /* Statistics */ unsigned long rx_gso_checksum_fixup; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index e4aa26748f80..b78ee10a956a 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -316,8 +316,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, vif->credit_bytes = vif->remaining_credit = ~0UL; vif->credit_usec = 0UL; init_timer(&vif->credit_timeout); - /* Initialize 'expires' now: it's used to track the credit window. */ - vif->credit_timeout.expires = jiffies; + vif->credit_window_start = get_jiffies_64(); dev->netdev_ops = &xenvif_netdev_ops; dev->hw_features = NETIF_F_SG | diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 828fdab4f1a4..919b6509455c 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1380,9 +1380,8 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) { - unsigned long now = jiffies; - unsigned long next_credit = - vif->credit_timeout.expires + + u64 now = get_jiffies_64(); + u64 next_credit = vif->credit_window_start + msecs_to_jiffies(vif->credit_usec / 1000); /* Timer could already be pending in rare cases. */ @@ -1390,8 +1389,8 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) return true; /* Passed the point where we can replenish credit? */ - if (time_after_eq(now, next_credit)) { - vif->credit_timeout.expires = now; + if (time_after_eq64(now, next_credit)) { + vif->credit_window_start = now; tx_add_credit(vif); } @@ -1403,6 +1402,7 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) tx_credit_callback; mod_timer(&vif->credit_timeout, next_credit); + vif->credit_window_start = next_credit; return true; } diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index be12fbfcae10..1ea75236a15f 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -552,9 +552,8 @@ static void __ref enable_slot(struct acpiphp_slot *slot) struct acpiphp_func *func; int max, pass; LIST_HEAD(add_list); - int nr_found; - nr_found = acpiphp_rescan_slot(slot); + acpiphp_rescan_slot(slot); max = acpiphp_max_busnr(bus); for (pass = 0; pass < 2; pass++) { list_for_each_entry(dev, &bus->devices, bus_list) { @@ -574,9 +573,6 @@ static void __ref enable_slot(struct acpiphp_slot *slot) } } __pci_bus_assign_resources(bus, &add_list, NULL); - /* Nothing more to do here if there are no new devices on this bus. */ - if (!nr_found && (slot->flags & SLOT_ENABLED)) - return; acpiphp_sanitize_bus(bus); acpiphp_set_hpp_values(bus); diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c index feab3a5e50b5..757eb0716d45 100644 --- a/drivers/scsi/BusLogic.c +++ b/drivers/scsi/BusLogic.c @@ -696,7 +696,7 @@ static int __init blogic_init_mm_probeinfo(struct blogic_adapter *adapter) while ((pci_device = pci_get_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER, pci_device)) != NULL) { - struct blogic_adapter *adapter = adapter; + struct blogic_adapter *host_adapter = adapter; struct blogic_adapter_info adapter_info; enum blogic_isa_ioport mod_ioaddr_req; unsigned char bus; @@ -744,9 +744,9 @@ static int __init blogic_init_mm_probeinfo(struct blogic_adapter *adapter) known and enabled, note that the particular Standard ISA I/O Address should not be probed. */ - adapter->io_addr = io_addr; - blogic_intreset(adapter); - if (blogic_cmd(adapter, BLOGIC_INQ_PCI_INFO, NULL, 0, + host_adapter->io_addr = io_addr; + blogic_intreset(host_adapter); + if (blogic_cmd(host_adapter, BLOGIC_INQ_PCI_INFO, NULL, 0, &adapter_info, sizeof(adapter_info)) == sizeof(adapter_info)) { if (adapter_info.isa_port < 6) @@ -762,7 +762,7 @@ static int __init blogic_init_mm_probeinfo(struct blogic_adapter *adapter) I/O Address assigned at system initialization. */ mod_ioaddr_req = BLOGIC_IO_DISABLE; - blogic_cmd(adapter, BLOGIC_MOD_IOADDR, &mod_ioaddr_req, + blogic_cmd(host_adapter, BLOGIC_MOD_IOADDR, &mod_ioaddr_req, sizeof(mod_ioaddr_req), NULL, 0); /* For the first MultiMaster Host Adapter enumerated, @@ -779,12 +779,12 @@ static int __init blogic_init_mm_probeinfo(struct blogic_adapter *adapter) fetch_localram.offset = BLOGIC_AUTOSCSI_BASE + 45; fetch_localram.count = sizeof(autoscsi_byte45); - blogic_cmd(adapter, BLOGIC_FETCH_LOCALRAM, + blogic_cmd(host_adapter, BLOGIC_FETCH_LOCALRAM, &fetch_localram, sizeof(fetch_localram), &autoscsi_byte45, sizeof(autoscsi_byte45)); - blogic_cmd(adapter, BLOGIC_GET_BOARD_ID, NULL, 0, &id, - sizeof(id)); + blogic_cmd(host_adapter, BLOGIC_GET_BOARD_ID, NULL, 0, + &id, sizeof(id)); if (id.fw_ver_digit1 == '5') force_scan_order = autoscsi_byte45.force_scan_order; diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 408a42ef787a..f0d432c139d0 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -771,6 +771,8 @@ static long aac_compat_do_ioctl(struct aac_dev *dev, unsigned cmd, unsigned long static int aac_compat_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) { struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata; + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; return aac_compat_do_ioctl(dev, cmd, (unsigned long)arg); } diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 2ef497ebadc0..ee5c1833eb73 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -20,7 +20,7 @@ * | Device Discovery | 0x2095 | 0x2020-0x2022, | * | | | 0x2011-0x2012, | * | | | 0x2016 | - * | Queue Command and IO tracing | 0x3058 | 0x3006-0x300b | + * | Queue Command and IO tracing | 0x3059 | 0x3006-0x300b | * | | | 0x3027-0x3028 | * | | | 0x303d-0x3041 | * | | | 0x302d,0x3033 | diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index df1b30ba938c..ff9c86b1a0d8 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1957,6 +1957,15 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt) que = MSW(sts->handle); req = ha->req_q_map[que]; + /* Check for invalid queue pointer */ + if (req == NULL || + que >= find_first_zero_bit(ha->req_qid_map, ha->max_req_queues)) { + ql_dbg(ql_dbg_io, vha, 0x3059, + "Invalid status handle (0x%x): Bad req pointer. req=%p, " + "que=%u.\n", sts->handle, req, que); + return; + } + /* Validate handle. */ if (handle < req->num_outstanding_cmds) sp = req->outstanding_cmds[handle]; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index e62d17d41d4e..5693f6d7eddb 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2854,6 +2854,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie) gd->events |= DISK_EVENT_MEDIA_CHANGE; } + blk_pm_runtime_init(sdp->request_queue, dev); add_disk(gd); if (sdkp->capacity) sd_dif_config_host(sdkp); @@ -2862,7 +2863,6 @@ static void sd_probe_async(void *data, async_cookie_t cookie) sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n", sdp->removable ? "removable " : ""); - blk_pm_runtime_init(sdp->request_queue, dev); scsi_autopm_put_device(sdp); put_device(&sdkp->dev); } diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 5cbc4bb1b395..df5e961484e1 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -105,8 +105,11 @@ static int scatter_elem_sz_prev = SG_SCATTER_SZ; static int sg_add(struct device *, struct class_interface *); static void sg_remove(struct device *, struct class_interface *); +static DEFINE_SPINLOCK(sg_open_exclusive_lock); + static DEFINE_IDR(sg_index_idr); -static DEFINE_RWLOCK(sg_index_lock); +static DEFINE_RWLOCK(sg_index_lock); /* Also used to lock + file descriptor list for device */ static struct class_interface sg_interface = { .add_dev = sg_add, @@ -143,7 +146,8 @@ typedef struct sg_request { /* SG_MAX_QUEUE requests outstanding per file */ } Sg_request; typedef struct sg_fd { /* holds the state of a file descriptor */ - struct list_head sfd_siblings; /* protected by sfd_lock of device */ + /* sfd_siblings is protected by sg_index_lock */ + struct list_head sfd_siblings; struct sg_device *parentdp; /* owning device */ wait_queue_head_t read_wait; /* queue read until command done */ rwlock_t rq_list_lock; /* protect access to list in req_arr */ @@ -166,12 +170,13 @@ typedef struct sg_fd { /* holds the state of a file descriptor */ typedef struct sg_device { /* holds the state of each scsi generic device */ struct scsi_device *device; + wait_queue_head_t o_excl_wait; /* queue open() when O_EXCL in use */ int sg_tablesize; /* adapter's max scatter-gather table size */ u32 index; /* device index number */ - spinlock_t sfd_lock; /* protect file descriptor list for device */ + /* sfds is protected by sg_index_lock */ struct list_head sfds; - struct rw_semaphore o_sem; /* exclude open should hold this rwsem */ volatile char detached; /* 0->attached, 1->detached pending removal */ + /* exclude protected by sg_open_exclusive_lock */ char exclude; /* opened for exclusive access */ char sgdebug; /* 0->off, 1->sense, 9->dump dev, 10-> all devs */ struct gendisk *disk; @@ -220,14 +225,35 @@ static int sg_allow_access(struct file *filp, unsigned char *cmd) return blk_verify_command(cmd, filp->f_mode & FMODE_WRITE); } +static int get_exclude(Sg_device *sdp) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&sg_open_exclusive_lock, flags); + ret = sdp->exclude; + spin_unlock_irqrestore(&sg_open_exclusive_lock, flags); + return ret; +} + +static int set_exclude(Sg_device *sdp, char val) +{ + unsigned long flags; + + spin_lock_irqsave(&sg_open_exclusive_lock, flags); + sdp->exclude = val; + spin_unlock_irqrestore(&sg_open_exclusive_lock, flags); + return val; +} + static int sfds_list_empty(Sg_device *sdp) { unsigned long flags; int ret; - spin_lock_irqsave(&sdp->sfd_lock, flags); + read_lock_irqsave(&sg_index_lock, flags); ret = list_empty(&sdp->sfds); - spin_unlock_irqrestore(&sdp->sfd_lock, flags); + read_unlock_irqrestore(&sg_index_lock, flags); return ret; } @@ -239,6 +265,7 @@ sg_open(struct inode *inode, struct file *filp) struct request_queue *q; Sg_device *sdp; Sg_fd *sfp; + int res; int retval; nonseekable_open(inode, filp); @@ -267,52 +294,54 @@ sg_open(struct inode *inode, struct file *filp) goto error_out; } - if ((flags & O_EXCL) && (O_RDONLY == (flags & O_ACCMODE))) { - retval = -EPERM; /* Can't lock it with read only access */ - goto error_out; - } - if (flags & O_NONBLOCK) { - if (flags & O_EXCL) { - if (!down_write_trylock(&sdp->o_sem)) { - retval = -EBUSY; - goto error_out; - } - } else { - if (!down_read_trylock(&sdp->o_sem)) { - retval = -EBUSY; - goto error_out; - } + if (flags & O_EXCL) { + if (O_RDONLY == (flags & O_ACCMODE)) { + retval = -EPERM; /* Can't lock it with read only access */ + goto error_out; + } + if (!sfds_list_empty(sdp) && (flags & O_NONBLOCK)) { + retval = -EBUSY; + goto error_out; + } + res = wait_event_interruptible(sdp->o_excl_wait, + ((!sfds_list_empty(sdp) || get_exclude(sdp)) ? 0 : set_exclude(sdp, 1))); + if (res) { + retval = res; /* -ERESTARTSYS because signal hit process */ + goto error_out; + } + } else if (get_exclude(sdp)) { /* some other fd has an exclusive lock on dev */ + if (flags & O_NONBLOCK) { + retval = -EBUSY; + goto error_out; + } + res = wait_event_interruptible(sdp->o_excl_wait, !get_exclude(sdp)); + if (res) { + retval = res; /* -ERESTARTSYS because signal hit process */ + goto error_out; } - } else { - if (flags & O_EXCL) - down_write(&sdp->o_sem); - else - down_read(&sdp->o_sem); } - /* Since write lock is held, no need to check sfd_list */ - if (flags & O_EXCL) - sdp->exclude = 1; /* used by release lock */ - + if (sdp->detached) { + retval = -ENODEV; + goto error_out; + } if (sfds_list_empty(sdp)) { /* no existing opens on this device */ sdp->sgdebug = 0; q = sdp->device->request_queue; sdp->sg_tablesize = queue_max_segments(q); } - sfp = sg_add_sfp(sdp, dev); - if (!IS_ERR(sfp)) + if ((sfp = sg_add_sfp(sdp, dev))) filp->private_data = sfp; - /* retval is already provably zero at this point because of the - * check after retval = scsi_autopm_get_device(sdp->device)) - */ else { - retval = PTR_ERR(sfp); - if (flags & O_EXCL) { - sdp->exclude = 0; /* undo if error */ - up_write(&sdp->o_sem); - } else - up_read(&sdp->o_sem); + set_exclude(sdp, 0); /* undo if error */ + wake_up_interruptible(&sdp->o_excl_wait); + } + retval = -ENOMEM; + goto error_out; + } + retval = 0; error_out: + if (retval) { scsi_autopm_put_device(sdp->device); sdp_put: scsi_device_put(sdp->device); @@ -329,18 +358,13 @@ sg_release(struct inode *inode, struct file *filp) { Sg_device *sdp; Sg_fd *sfp; - int excl; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, printk("sg_release: %s\n", sdp->disk->disk_name)); - excl = sdp->exclude; - sdp->exclude = 0; - if (excl) - up_write(&sdp->o_sem); - else - up_read(&sdp->o_sem); + set_exclude(sdp, 0); + wake_up_interruptible(&sdp->o_excl_wait); scsi_autopm_put_device(sdp->device); kref_put(&sfp->f_ref, sg_remove_sfp); @@ -1391,9 +1415,8 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp) disk->first_minor = k; sdp->disk = disk; sdp->device = scsidp; - spin_lock_init(&sdp->sfd_lock); INIT_LIST_HEAD(&sdp->sfds); - init_rwsem(&sdp->o_sem); + init_waitqueue_head(&sdp->o_excl_wait); sdp->sg_tablesize = queue_max_segments(q); sdp->index = k; kref_init(&sdp->d_ref); @@ -1526,13 +1549,11 @@ static void sg_remove(struct device *cl_dev, struct class_interface *cl_intf) /* Need a write lock to set sdp->detached. */ write_lock_irqsave(&sg_index_lock, iflags); - spin_lock(&sdp->sfd_lock); sdp->detached = 1; list_for_each_entry(sfp, &sdp->sfds, sfd_siblings) { wake_up_interruptible(&sfp->read_wait); kill_fasync(&sfp->async_qp, SIGPOLL, POLL_HUP); } - spin_unlock(&sdp->sfd_lock); write_unlock_irqrestore(&sg_index_lock, iflags); sysfs_remove_link(&scsidp->sdev_gendev.kobj, "generic"); @@ -2043,7 +2064,7 @@ sg_add_sfp(Sg_device * sdp, int dev) sfp = kzalloc(sizeof(*sfp), GFP_ATOMIC | __GFP_NOWARN); if (!sfp) - return ERR_PTR(-ENOMEM); + return NULL; init_waitqueue_head(&sfp->read_wait); rwlock_init(&sfp->rq_list_lock); @@ -2057,13 +2078,9 @@ sg_add_sfp(Sg_device * sdp, int dev) sfp->cmd_q = SG_DEF_COMMAND_Q; sfp->keep_orphan = SG_DEF_KEEP_ORPHAN; sfp->parentdp = sdp; - spin_lock_irqsave(&sdp->sfd_lock, iflags); - if (sdp->detached) { - spin_unlock_irqrestore(&sdp->sfd_lock, iflags); - return ERR_PTR(-ENODEV); - } + write_lock_irqsave(&sg_index_lock, iflags); list_add_tail(&sfp->sfd_siblings, &sdp->sfds); - spin_unlock_irqrestore(&sdp->sfd_lock, iflags); + write_unlock_irqrestore(&sg_index_lock, iflags); SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp: sfp=0x%p\n", sfp)); if (unlikely(sg_big_buff != def_reserved_size)) sg_big_buff = def_reserved_size; @@ -2113,9 +2130,10 @@ static void sg_remove_sfp(struct kref *kref) struct sg_device *sdp = sfp->parentdp; unsigned long iflags; - spin_lock_irqsave(&sdp->sfd_lock, iflags); + write_lock_irqsave(&sg_index_lock, iflags); list_del(&sfp->sfd_siblings); - spin_unlock_irqrestore(&sdp->sfd_lock, iflags); + write_unlock_irqrestore(&sg_index_lock, iflags); + wake_up_interruptible(&sdp->o_excl_wait); INIT_WORK(&sfp->ew.work, sg_remove_sfp_usercontext); schedule_work(&sfp->ew.work); @@ -2502,7 +2520,7 @@ static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v) return 0; } -/* must be called while holding sg_index_lock and sfd_lock */ +/* must be called while holding sg_index_lock */ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) { int k, m, new_interface, blen, usg; @@ -2587,26 +2605,22 @@ static int sg_proc_seq_show_debug(struct seq_file *s, void *v) read_lock_irqsave(&sg_index_lock, iflags); sdp = it ? sg_lookup_dev(it->index) : NULL; - if (sdp) { - spin_lock(&sdp->sfd_lock); - if (!list_empty(&sdp->sfds)) { - struct scsi_device *scsidp = sdp->device; + if (sdp && !list_empty(&sdp->sfds)) { + struct scsi_device *scsidp = sdp->device; - seq_printf(s, " >>> device=%s ", sdp->disk->disk_name); - if (sdp->detached) - seq_printf(s, "detached pending close "); - else - seq_printf - (s, "scsi%d chan=%d id=%d lun=%d em=%d", - scsidp->host->host_no, - scsidp->channel, scsidp->id, - scsidp->lun, - scsidp->host->hostt->emulated); - seq_printf(s, " sg_tablesize=%d excl=%d\n", - sdp->sg_tablesize, sdp->exclude); - sg_proc_debug_helper(s, sdp); - } - spin_unlock(&sdp->sfd_lock); + seq_printf(s, " >>> device=%s ", sdp->disk->disk_name); + if (sdp->detached) + seq_printf(s, "detached pending close "); + else + seq_printf + (s, "scsi%d chan=%d id=%d lun=%d em=%d", + scsidp->host->host_no, + scsidp->channel, scsidp->id, + scsidp->lun, + scsidp->host->hostt->emulated); + seq_printf(s, " sg_tablesize=%d excl=%d\n", + sdp->sg_tablesize, get_exclude(sdp)); + sg_proc_debug_helper(s, sdp); } read_unlock_irqrestore(&sg_index_lock, iflags); return 0; diff --git a/drivers/staging/bcm/Bcmchar.c b/drivers/staging/bcm/Bcmchar.c index f91bc1fdd895..639ba96adb36 100644 --- a/drivers/staging/bcm/Bcmchar.c +++ b/drivers/staging/bcm/Bcmchar.c @@ -1960,6 +1960,7 @@ cntrlEnd: BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, OSAL_DBG, DBG_LVL_ALL, "Called IOCTL_BCM_GET_DEVICE_DRIVER_INFO\n"); + memset(&DevInfo, 0, sizeof(DevInfo)); DevInfo.MaxRDMBufferSize = BUFFER_4K; DevInfo.u32DSDStartOffset = EEPROM_CALPARAM_START; DevInfo.u32RxAlignmentCorrection = 0; diff --git a/drivers/staging/ozwpan/ozcdev.c b/drivers/staging/ozwpan/ozcdev.c index 6ccb64fb0786..6ce0af9977d8 100644 --- a/drivers/staging/ozwpan/ozcdev.c +++ b/drivers/staging/ozwpan/ozcdev.c @@ -155,6 +155,9 @@ static ssize_t oz_cdev_write(struct file *filp, const char __user *buf, struct oz_app_hdr *app_hdr; struct oz_serial_ctx *ctx; + if (count > sizeof(ei->data) - sizeof(*elt) - sizeof(*app_hdr)) + return -EINVAL; + spin_lock_bh(&g_cdev.lock); pd = g_cdev.active_pd; if (pd) diff --git a/drivers/staging/sb105x/sb_pci_mp.c b/drivers/staging/sb105x/sb_pci_mp.c index 23db32f07fd5..a10cdb17038b 100644 --- a/drivers/staging/sb105x/sb_pci_mp.c +++ b/drivers/staging/sb105x/sb_pci_mp.c @@ -1063,7 +1063,7 @@ static int mp_wait_modem_status(struct sb_uart_state *state, unsigned long arg) static int mp_get_count(struct sb_uart_state *state, struct serial_icounter_struct *icnt) { - struct serial_icounter_struct icount; + struct serial_icounter_struct icount = {}; struct sb_uart_icount cnow; struct sb_uart_port *port = state->port; diff --git a/drivers/staging/wlags49_h2/wl_priv.c b/drivers/staging/wlags49_h2/wl_priv.c index c97e0e154d28..7e10dcdc3090 100644 --- a/drivers/staging/wlags49_h2/wl_priv.c +++ b/drivers/staging/wlags49_h2/wl_priv.c @@ -570,6 +570,7 @@ int wvlan_uil_put_info(struct uilreq *urq, struct wl_private *lp) ltv_t *pLtv; bool_t ltvAllocated = FALSE; ENCSTRCT sEncryption; + size_t len; #ifdef USE_WDS hcf_16 hcfPort = HCF_PORT_0; @@ -686,7 +687,8 @@ int wvlan_uil_put_info(struct uilreq *urq, struct wl_private *lp) break; case CFG_CNF_OWN_NAME: memset(lp->StationName, 0, sizeof(lp->StationName)); - memcpy((void *)lp->StationName, (void *)&pLtv->u.u8[2], (size_t)pLtv->u.u16[0]); + len = min_t(size_t, pLtv->u.u16[0], sizeof(lp->StationName)); + strlcpy(lp->StationName, &pLtv->u.u8[2], len); pLtv->u.u16[0] = CNV_INT_TO_LITTLE(pLtv->u.u16[0]); break; case CFG_CNF_LOAD_BALANCING: @@ -1783,6 +1785,7 @@ int wvlan_set_station_nickname(struct net_device *dev, { struct wl_private *lp = wl_priv(dev); unsigned long flags; + size_t len; int ret = 0; /*------------------------------------------------------------------------*/ @@ -1793,8 +1796,8 @@ int wvlan_set_station_nickname(struct net_device *dev, wl_lock(lp, &flags); memset(lp->StationName, 0, sizeof(lp->StationName)); - - memcpy(lp->StationName, extra, wrqu->data.length); + len = min_t(size_t, wrqu->data.length, sizeof(lp->StationName)); + strlcpy(lp->StationName, extra, len); /* Commit the adapter parameters */ wl_apply(lp); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 551c96ca60ac..0f199f6a0738 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -134,10 +134,10 @@ static int pscsi_pmode_enable_hba(struct se_hba *hba, unsigned long mode_flag) * pSCSI Host ID and enable for phba mode */ sh = scsi_host_lookup(phv->phv_host_id); - if (IS_ERR(sh)) { + if (!sh) { pr_err("pSCSI: Unable to locate SCSI Host for" " phv_host_id: %d\n", phv->phv_host_id); - return PTR_ERR(sh); + return -EINVAL; } phv->phv_lld_host = sh; @@ -515,10 +515,10 @@ static int pscsi_configure_device(struct se_device *dev) sh = phv->phv_lld_host; } else { sh = scsi_host_lookup(pdv->pdv_host_id); - if (IS_ERR(sh)) { + if (!sh) { pr_err("pSCSI: Unable to locate" " pdv_host_id: %d\n", pdv->pdv_host_id); - return PTR_ERR(sh); + return -EINVAL; } } } else { diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 4714c6f8da4b..d9b92b2c524d 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -263,6 +263,11 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o sectors, cmd->se_dev->dev_attrib.max_write_same_len); return TCM_INVALID_CDB_FIELD; } + /* We always have ANC_SUP == 0 so setting ANCHOR is always an error */ + if (flags[0] & 0x10) { + pr_warn("WRITE SAME with ANCHOR not supported\n"); + return TCM_INVALID_CDB_FIELD; + } /* * Special case for WRITE_SAME w/ UNMAP=1 that ends up getting * translated into block discard requests within backend code. diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 3da4fd10b9f8..474cd44fac14 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -82,6 +82,9 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op mutex_lock(&g_device_mutex); list_for_each_entry(se_dev, &g_device_list, g_dev_node) { + if (!se_dev->dev_attrib.emulate_3pc) + continue; + memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]); @@ -357,6 +360,7 @@ struct xcopy_pt_cmd { struct se_cmd se_cmd; struct xcopy_op *xcopy_op; struct completion xpt_passthrough_sem; + unsigned char sense_buffer[TRANSPORT_SENSE_BUFFER]; }; static struct se_port xcopy_pt_port; @@ -675,7 +679,8 @@ static int target_xcopy_issue_pt_cmd(struct xcopy_pt_cmd *xpt_cmd) pr_debug("target_xcopy_issue_pt_cmd(): SCSI status: 0x%02x\n", se_cmd->scsi_status); - return 0; + + return (se_cmd->scsi_status) ? -EINVAL : 0; } static int target_xcopy_read_source( @@ -708,7 +713,7 @@ static int target_xcopy_read_source( (unsigned long long)src_lba, src_sectors, length); transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, - DMA_FROM_DEVICE, 0, NULL); + DMA_FROM_DEVICE, 0, &xpt_cmd->sense_buffer[0]); xop->src_pt_cmd = xpt_cmd; rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, src_dev, &cdb[0], @@ -768,7 +773,7 @@ static int target_xcopy_write_destination( (unsigned long long)dst_lba, dst_sectors, length); transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, - DMA_TO_DEVICE, 0, NULL); + DMA_TO_DEVICE, 0, &xpt_cmd->sense_buffer[0]); xop->dst_pt_cmd = xpt_cmd; rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, dst_dev, &cdb[0], @@ -884,30 +889,42 @@ out: sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) { + struct se_device *dev = se_cmd->se_dev; struct xcopy_op *xop = NULL; unsigned char *p = NULL, *seg_desc; unsigned int list_id, list_id_usage, sdll, inline_dl, sa; + sense_reason_t ret = TCM_INVALID_PARAMETER_LIST; int rc; unsigned short tdll; + if (!dev->dev_attrib.emulate_3pc) { + pr_err("EXTENDED_COPY operation explicitly disabled\n"); + return TCM_UNSUPPORTED_SCSI_OPCODE; + } + sa = se_cmd->t_task_cdb[1] & 0x1f; if (sa != 0x00) { pr_err("EXTENDED_COPY(LID4) not supported\n"); return TCM_UNSUPPORTED_SCSI_OPCODE; } + xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL); + if (!xop) { + pr_err("Unable to allocate xcopy_op\n"); + return TCM_OUT_OF_RESOURCES; + } + xop->xop_se_cmd = se_cmd; + p = transport_kmap_data_sg(se_cmd); if (!p) { pr_err("transport_kmap_data_sg() failed in target_do_xcopy\n"); + kfree(xop); return TCM_OUT_OF_RESOURCES; } list_id = p[0]; - if (list_id != 0x00) { - pr_err("XCOPY with non zero list_id: 0x%02x\n", list_id); - goto out; - } - list_id_usage = (p[1] & 0x18); + list_id_usage = (p[1] & 0x18) >> 3; + /* * Determine TARGET DESCRIPTOR LIST LENGTH + SEGMENT DESCRIPTOR LIST LENGTH */ @@ -920,13 +937,6 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) goto out; } - xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL); - if (!xop) { - pr_err("Unable to allocate xcopy_op\n"); - goto out; - } - xop->xop_se_cmd = se_cmd; - pr_debug("Processing XCOPY with list_id: 0x%02x list_id_usage: 0x%02x" " tdll: %hu sdll: %u inline_dl: %u\n", list_id, list_id_usage, tdll, sdll, inline_dl); @@ -935,6 +945,17 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) if (rc <= 0) goto out; + if (xop->src_dev->dev_attrib.block_size != + xop->dst_dev->dev_attrib.block_size) { + pr_err("XCOPY: Non matching src_dev block_size: %u + dst_dev" + " block_size: %u currently unsupported\n", + xop->src_dev->dev_attrib.block_size, + xop->dst_dev->dev_attrib.block_size); + xcopy_pt_undepend_remotedev(xop); + ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + goto out; + } + pr_debug("XCOPY: Processed %d target descriptors, length: %u\n", rc, rc * XCOPY_TARGET_DESC_LEN); seg_desc = &p[16]; @@ -957,7 +978,7 @@ out: if (p) transport_kunmap_data_sg(se_cmd); kfree(xop); - return TCM_INVALID_CDB_FIELD; + return ret; } static sense_reason_t target_rcr_operating_parameters(struct se_cmd *se_cmd) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index d067285a2d20..6b0f75eac8a2 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -1499,7 +1499,7 @@ static void atmel_set_ops(struct uart_port *port) /* * Get ip name usart or uart */ -static int atmel_get_ip_name(struct uart_port *port) +static void atmel_get_ip_name(struct uart_port *port) { struct atmel_uart_port *atmel_port = to_atmel_uart_port(port); int name = UART_GET_IP_NAME(port); @@ -1518,10 +1518,7 @@ static int atmel_get_ip_name(struct uart_port *port) atmel_port->is_usart = false; } else { dev_err(port->dev, "Not supported ip name, set to uart\n"); - return -EINVAL; } - - return 0; } /* @@ -2405,9 +2402,7 @@ static int atmel_serial_probe(struct platform_device *pdev) /* * Get port name of usart or uart */ - ret = atmel_get_ip_name(&port->uart); - if (ret < 0) - goto err_add_port; + atmel_get_ip_name(&port->uart); return 0; diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index ba475632c5fa..0e808cf91d97 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -642,16 +642,29 @@ static int uio_mmap_physical(struct vm_area_struct *vma) { struct uio_device *idev = vma->vm_private_data; int mi = uio_find_mem_index(vma); + struct uio_mem *mem; if (mi < 0) return -EINVAL; + mem = idev->info->mem + mi; - vma->vm_ops = &uio_physical_vm_ops; + if (vma->vm_end - vma->vm_start > mem->size) + return -EINVAL; + vma->vm_ops = &uio_physical_vm_ops; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + /* + * We cannot use the vm_iomap_memory() helper here, + * because vma->vm_pgoff is the map index we looked + * up above in uio_find_mem_index(), rather than an + * actual page offset into the mmap. + * + * So we just do the physical mmap without a page + * offset. + */ return remap_pfn_range(vma, vma->vm_start, - idev->info->mem[mi].addr >> PAGE_SHIFT, + mem->addr >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot); } diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index c45f9c0a1b34..b21d553c245b 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -904,6 +904,7 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_LUMEL_PD12_PID) }, /* Crucible Devices */ { USB_DEVICE(FTDI_VID, FTDI_CT_COMET_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_Z3X_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 1b8af461b522..a7019d1e3058 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1307,3 +1307,9 @@ * Manufacturer: Crucible Technologies */ #define FTDI_CT_COMET_PID 0x8e08 + +/* + * Product: Z3X Box + * Manufacturer: Smart GSM Team + */ +#define FTDI_Z3X_PID 0x0011 diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index bedf8e47713b..1e6de4cd079d 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -4,11 +4,6 @@ * Copyright (C) 2001-2007 Greg Kroah-Hartman (greg@kroah.com) * Copyright (C) 2003 IBM Corp. * - * Copyright (C) 2009, 2013 Frank Schäfer <fschaefer.oss@googlemail.com> - * - fixes, improvements and documentation for the baud rate encoding methods - * Copyright (C) 2013 Reinhard Max <max@suse.de> - * - fixes and improvements for the divisor based baud rate encoding method - * * Original driver for 2.2.x by anonymous * * This program is free software; you can redistribute it and/or @@ -134,18 +129,10 @@ MODULE_DEVICE_TABLE(usb, id_table); enum pl2303_type { - type_0, /* H version ? */ - type_1, /* H version ? */ - HX_TA, /* HX(A) / X(A) / TA version */ /* TODO: improve */ - HXD_EA_RA_SA, /* HXD / EA / RA / SA version */ /* TODO: improve */ - TB, /* TB version */ - HX_CLONE, /* Cheap and less functional clone of the HX chip */ + type_0, /* don't know the difference between type 0 and */ + type_1, /* type 1, until someone from prolific tells us... */ + HX, /* HX version of the pl2303 chip */ }; -/* - * NOTE: don't know the difference between type 0 and type 1, - * until someone from Prolific tells us... - * TODO: distinguish between X/HX, TA and HXD, EA, RA, SA variants - */ struct pl2303_serial_private { enum pl2303_type type; @@ -185,7 +172,6 @@ static int pl2303_startup(struct usb_serial *serial) { struct pl2303_serial_private *spriv; enum pl2303_type type = type_0; - char *type_str = "unknown (treating as type_0)"; unsigned char *buf; spriv = kzalloc(sizeof(*spriv), GFP_KERNEL); @@ -198,53 +184,15 @@ static int pl2303_startup(struct usb_serial *serial) return -ENOMEM; } - if (serial->dev->descriptor.bDeviceClass == 0x02) { + if (serial->dev->descriptor.bDeviceClass == 0x02) type = type_0; - type_str = "type_0"; - } else if (serial->dev->descriptor.bMaxPacketSize0 == 0x40) { - /* - * NOTE: The bcdDevice version is the only difference between - * the device descriptors of the X/HX, HXD, EA, RA, SA, TA, TB - */ - if (le16_to_cpu(serial->dev->descriptor.bcdDevice) == 0x300) { - /* Check if the device is a clone */ - pl2303_vendor_read(0x9494, 0, serial, buf); - /* - * NOTE: Not sure if this read is really needed. - * The HX returns 0x00, the clone 0x02, but the Windows - * driver seems to ignore the value and continues. - */ - pl2303_vendor_write(0x0606, 0xaa, serial); - pl2303_vendor_read(0x8686, 0, serial, buf); - if (buf[0] != 0xaa) { - type = HX_CLONE; - type_str = "X/HX clone (limited functionality)"; - } else { - type = HX_TA; - type_str = "X/HX/TA"; - } - pl2303_vendor_write(0x0606, 0x00, serial); - } else if (le16_to_cpu(serial->dev->descriptor.bcdDevice) - == 0x400) { - type = HXD_EA_RA_SA; - type_str = "HXD/EA/RA/SA"; - } else if (le16_to_cpu(serial->dev->descriptor.bcdDevice) - == 0x500) { - type = TB; - type_str = "TB"; - } else { - dev_info(&serial->interface->dev, - "unknown/unsupported device type\n"); - kfree(spriv); - kfree(buf); - return -ENODEV; - } - } else if (serial->dev->descriptor.bDeviceClass == 0x00 - || serial->dev->descriptor.bDeviceClass == 0xFF) { + else if (serial->dev->descriptor.bMaxPacketSize0 == 0x40) + type = HX; + else if (serial->dev->descriptor.bDeviceClass == 0x00) type = type_1; - type_str = "type_1"; - } - dev_dbg(&serial->interface->dev, "device type: %s\n", type_str); + else if (serial->dev->descriptor.bDeviceClass == 0xFF) + type = type_1; + dev_dbg(&serial->interface->dev, "device type: %d\n", type); spriv->type = type; usb_set_serial_data(serial, spriv); @@ -259,10 +207,10 @@ static int pl2303_startup(struct usb_serial *serial) pl2303_vendor_read(0x8383, 0, serial, buf); pl2303_vendor_write(0, 1, serial); pl2303_vendor_write(1, 0, serial); - if (type == type_0 || type == type_1) - pl2303_vendor_write(2, 0x24, serial); - else + if (type == HX) pl2303_vendor_write(2, 0x44, serial); + else + pl2303_vendor_write(2, 0x24, serial); kfree(buf); return 0; @@ -316,174 +264,65 @@ static int pl2303_set_control_lines(struct usb_serial_port *port, u8 value) return retval; } -static int pl2303_baudrate_encode_direct(int baud, enum pl2303_type type, - u8 buf[4]) +static void pl2303_encode_baudrate(struct tty_struct *tty, + struct usb_serial_port *port, + u8 buf[4]) { - /* - * NOTE: Only the values defined in baud_sup are supported ! - * => if unsupported values are set, the PL2303 uses 9600 baud instead - * => HX clones just don't work at unsupported baud rates < 115200 baud, - * for baud rates > 115200 they run at 115200 baud - */ const int baud_sup[] = { 75, 150, 300, 600, 1200, 1800, 2400, 3600, - 4800, 7200, 9600, 14400, 19200, 28800, 38400, - 57600, 115200, 230400, 460800, 614400, 921600, - 1228800, 2457600, 3000000, 6000000, 12000000 }; + 4800, 7200, 9600, 14400, 19200, 28800, 38400, + 57600, 115200, 230400, 460800, 500000, 614400, + 921600, 1228800, 2457600, 3000000, 6000000 }; + + struct usb_serial *serial = port->serial; + struct pl2303_serial_private *spriv = usb_get_serial_data(serial); + int baud; + int i; + /* - * NOTE: With the exception of type_0/1 devices, the following - * additional baud rates are supported (tested with HX rev. 3A only): - * 110*, 56000*, 128000, 134400, 161280, 201600, 256000*, 268800, - * 403200, 806400. (*: not HX and HX clones) - * - * Maximum values: HXD, TB: 12000000; HX, TA: 6000000; - * type_0+1: 1228800; RA: 921600; HX clones, SA: 115200 - * - * As long as we are not using this encoding method for anything else - * than the type_0+1, HX and HX clone chips, there is no point in - * complicating the code to support them. + * NOTE: Only the values defined in baud_sup are supported! + * => if unsupported values are set, the PL2303 seems to use + * 9600 baud (at least my PL2303X always does) */ - int i; + baud = tty_get_baud_rate(tty); + dev_dbg(&port->dev, "baud requested = %d\n", baud); + if (!baud) + return; /* Set baudrate to nearest supported value */ for (i = 0; i < ARRAY_SIZE(baud_sup); ++i) { if (baud_sup[i] > baud) break; } + if (i == ARRAY_SIZE(baud_sup)) baud = baud_sup[i - 1]; else if (i > 0 && (baud_sup[i] - baud) > (baud - baud_sup[i - 1])) baud = baud_sup[i - 1]; else baud = baud_sup[i]; - /* Respect the chip type specific baud rate limits */ - /* - * FIXME: as long as we don't know how to distinguish between the - * HXD, EA, RA, and SA chip variants, allow the max. value of 12M. - */ - if (type == HX_TA) - baud = min_t(int, baud, 6000000); - else if (type == type_0 || type == type_1) - baud = min_t(int, baud, 1228800); - else if (type == HX_CLONE) - baud = min_t(int, baud, 115200); - /* Direct (standard) baud rate encoding method */ - put_unaligned_le32(baud, buf); - - return baud; -} -static int pl2303_baudrate_encode_divisor(int baud, enum pl2303_type type, - u8 buf[4]) -{ - /* - * Divisor based baud rate encoding method - * - * NOTE: HX clones do NOT support this method. - * It's not clear if the type_0/1 chips support it. - * - * divisor = 12MHz * 32 / baudrate = 2^A * B - * - * with - * - * A = buf[1] & 0x0e - * B = buf[0] + (buf[1] & 0x01) << 8 - * - * Special cases: - * => 8 < B < 16: device seems to work not properly - * => B <= 8: device uses the max. value B = 512 instead - */ - unsigned int A, B; + /* type_0, type_1 only support up to 1228800 baud */ + if (spriv->type != HX) + baud = min_t(int, baud, 1228800); - /* - * NOTE: The Windows driver allows maximum baud rates of 110% of the - * specified maximium value. - * Quick tests with early (2004) HX (rev. A) chips suggest, that even - * higher baud rates (up to the maximum of 24M baud !) are working fine, - * but that should really be tested carefully in "real life" scenarios - * before removing the upper limit completely. - * Baud rates smaller than the specified 75 baud are definitely working - * fine. - */ - if (type == type_0 || type == type_1) - baud = min_t(int, baud, 1228800 * 1.1); - else if (type == HX_TA) - baud = min_t(int, baud, 6000000 * 1.1); - else if (type == HXD_EA_RA_SA) - /* HXD, EA: 12Mbps; RA: 1Mbps; SA: 115200 bps */ - /* - * FIXME: as long as we don't know how to distinguish between - * these chip variants, allow the max. of these values - */ - baud = min_t(int, baud, 12000000 * 1.1); - else if (type == TB) - baud = min_t(int, baud, 12000000 * 1.1); - /* Determine factors A and B */ - A = 0; - B = 12000000 * 32 / baud; /* 12MHz */ - B <<= 1; /* Add one bit for rounding */ - while (B > (512 << 1) && A <= 14) { - A += 2; - B >>= 2; - } - if (A > 14) { /* max. divisor = min. baudrate reached */ - A = 14; - B = 512; - /* => ~45.78 baud */ + if (baud <= 115200) { + put_unaligned_le32(baud, buf); } else { - B = (B + 1) >> 1; /* Round the last bit */ - } - /* Handle special cases */ - if (B == 512) - B = 0; /* also: 1 to 8 */ - else if (B < 16) /* - * NOTE: With the current algorithm this happens - * only for A=0 and means that the min. divisor - * (respectively: the max. baudrate) is reached. + * Apparently the formula for higher speeds is: + * baudrate = 12M * 32 / (2^buf[1]) / buf[0] */ - B = 16; /* => 24 MBaud */ - /* Encode the baud rate */ - buf[3] = 0x80; /* Select divisor encoding method */ - buf[2] = 0; - buf[1] = (A & 0x0e); /* A */ - buf[1] |= ((B & 0x100) >> 8); /* MSB of B */ - buf[0] = B & 0xff; /* 8 LSBs of B */ - /* Calculate the actual/resulting baud rate */ - if (B <= 8) - B = 512; - baud = 12000000 * 32 / ((1 << A) * B); - - return baud; -} - -static void pl2303_encode_baudrate(struct tty_struct *tty, - struct usb_serial_port *port, - enum pl2303_type type, - u8 buf[4]) -{ - int baud; + unsigned tmp = 12000000 * 32 / baud; + buf[3] = 0x80; + buf[2] = 0; + buf[1] = (tmp >= 256); + while (tmp >= 256) { + tmp >>= 2; + buf[1] <<= 1; + } + buf[0] = tmp; + } - baud = tty_get_baud_rate(tty); - dev_dbg(&port->dev, "baud requested = %d\n", baud); - if (!baud) - return; - /* - * There are two methods for setting/encoding the baud rate - * 1) Direct method: encodes the baud rate value directly - * => supported by all chip types - * 2) Divisor based method: encodes a divisor to a base value (12MHz*32) - * => not supported by HX clones (and likely type_0/1 chips) - * - * NOTE: Although the divisor based baud rate encoding method is much - * more flexible, some of the standard baud rate values can not be - * realized exactly. But the difference is very small (max. 0.2%) and - * the device likely uses the same baud rate generator for both methods - * so that there is likley no difference. - */ - if (type == type_0 || type == type_1 || type == HX_CLONE) - baud = pl2303_baudrate_encode_direct(baud, type, buf); - else - baud = pl2303_baudrate_encode_divisor(baud, type, buf); /* Save resulting baud rate */ tty_encode_baud_rate(tty, baud, baud); dev_dbg(&port->dev, "baud set = %d\n", baud); @@ -540,8 +379,8 @@ static void pl2303_set_termios(struct tty_struct *tty, dev_dbg(&port->dev, "data bits = %d\n", buf[6]); } - /* For reference: buf[0]:buf[3] baud rate value */ - pl2303_encode_baudrate(tty, port, spriv->type, buf); + /* For reference buf[0]:buf[3] baud rate value */ + pl2303_encode_baudrate(tty, port, &buf[0]); /* For reference buf[4]=0 is 1 stop bits */ /* For reference buf[4]=1 is 1.5 stop bits */ @@ -618,10 +457,10 @@ static void pl2303_set_termios(struct tty_struct *tty, dev_dbg(&port->dev, "0xa1:0x21:0:0 %d - %7ph\n", i, buf); if (C_CRTSCTS(tty)) { - if (spriv->type == type_0 || spriv->type == type_1) - pl2303_vendor_write(0x0, 0x41, serial); - else + if (spriv->type == HX) pl2303_vendor_write(0x0, 0x61, serial); + else + pl2303_vendor_write(0x0, 0x41, serial); } else { pl2303_vendor_write(0x0, 0x0, serial); } @@ -658,7 +497,7 @@ static int pl2303_open(struct tty_struct *tty, struct usb_serial_port *port) struct pl2303_serial_private *spriv = usb_get_serial_data(serial); int result; - if (spriv->type == type_0 || spriv->type == type_1) { + if (spriv->type != HX) { usb_clear_halt(serial->dev, port->write_urb->pipe); usb_clear_halt(serial->dev, port->read_urb->pipe); } else { @@ -833,7 +672,6 @@ static void pl2303_break_ctl(struct tty_struct *tty, int break_state) result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), BREAK_REQUEST, BREAK_REQUEST_TYPE, state, 0, NULL, 0, 100); - /* NOTE: HX clones don't support sending breaks, -EPIPE is returned */ if (result) dev_err(&port->dev, "error sending break = %d\n", result); } diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index ce5221fa393a..e663921eebb6 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1056,7 +1056,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) if (data_direction != DMA_NONE) { ret = vhost_scsi_map_iov_to_sgl(cmd, &vq->iov[data_first], data_num, - data_direction == DMA_TO_DEVICE); + data_direction == DMA_FROM_DEVICE); if (unlikely(ret)) { vq_err(vq, "Failed to map iov to sgl\n"); goto err_free; diff --git a/drivers/video/au1100fb.c b/drivers/video/au1100fb.c index a54ccdc4d661..22ad85242e5b 100644 --- a/drivers/video/au1100fb.c +++ b/drivers/video/au1100fb.c @@ -361,37 +361,13 @@ void au1100fb_fb_rotate(struct fb_info *fbi, int angle) int au1100fb_fb_mmap(struct fb_info *fbi, struct vm_area_struct *vma) { struct au1100fb_device *fbdev; - unsigned int len; - unsigned long start=0, off; fbdev = to_au1100fb_device(fbi); - if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) { - return -EINVAL; - } - - start = fbdev->fb_phys & PAGE_MASK; - len = PAGE_ALIGN((start & ~PAGE_MASK) + fbdev->fb_len); - - off = vma->vm_pgoff << PAGE_SHIFT; - - if ((vma->vm_end - vma->vm_start + off) > len) { - return -EINVAL; - } - - off += start; - vma->vm_pgoff = off >> PAGE_SHIFT; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); pgprot_val(vma->vm_page_prot) |= (6 << 9); //CCA=6 - if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot)) { - return -EAGAIN; - } - - return 0; + return vm_iomap_memory(vma, fbdev->fb_phys, fbdev->fb_len); } static struct fb_ops au1100fb_ops = diff --git a/drivers/video/au1200fb.c b/drivers/video/au1200fb.c index 301224ecc950..1d02897d17f2 100644 --- a/drivers/video/au1200fb.c +++ b/drivers/video/au1200fb.c @@ -1233,34 +1233,13 @@ static int au1200fb_fb_blank(int blank_mode, struct fb_info *fbi) * method mainly to allow the use of the TLB streaming flag (CCA=6) */ static int au1200fb_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) - { - unsigned int len; - unsigned long start=0, off; struct au1200fb_device *fbdev = info->par; - if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) { - return -EINVAL; - } - - start = fbdev->fb_phys & PAGE_MASK; - len = PAGE_ALIGN((start & ~PAGE_MASK) + fbdev->fb_len); - - off = vma->vm_pgoff << PAGE_SHIFT; - - if ((vma->vm_end - vma->vm_start + off) > len) { - return -EINVAL; - } - - off += start; - vma->vm_pgoff = off >> PAGE_SHIFT; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); pgprot_val(vma->vm_page_prot) |= _CACHE_MASK; /* CCA=7 */ - return io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); + return vm_iomap_memory(vma, fbdev->fb_phys, fbdev->fb_len); } static void set_global(u_int cmd, struct au1200_lcd_global_regs_t *pdata) diff --git a/fs/dcache.c b/fs/dcache.c index 20532cb0b06e..ae6ebb88ceff 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -542,7 +542,7 @@ EXPORT_SYMBOL(d_drop); * If ref is non-zero, then decrement the refcount too. * Returns dentry requiring refcount drop, or NULL if we're done. */ -static inline struct dentry * +static struct dentry * dentry_kill(struct dentry *dentry, int unlock_on_failure) __releases(dentry->d_lock) { @@ -630,7 +630,8 @@ repeat: goto kill_it; } - dentry->d_flags |= DCACHE_REFERENCED; + if (!(dentry->d_flags & DCACHE_REFERENCED)) + dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); dentry->d_lockref.count--; diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index c88e355f7635..000eae2782b6 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -408,7 +408,7 @@ static loff_t lower_offset_for_page(struct ecryptfs_crypt_stat *crypt_stat, struct page *page) { return ecryptfs_lower_header_size(crypt_stat) + - (page->index << PAGE_CACHE_SHIFT); + ((loff_t)page->index << PAGE_CACHE_SHIFT); } /** diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 7d52806c2119..4725a07f003c 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1149,7 +1149,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_msg_ctx *msg_ctx; struct ecryptfs_message *msg = NULL; char *auth_tok_sig; - char *payload; + char *payload = NULL; size_t payload_len = 0; int rc; @@ -1203,6 +1203,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, } out: kfree(msg); + kfree(payload); return rc; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 473e09da7d02..810c28fb8c3c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -34,7 +34,6 @@ #include <linux/mutex.h> #include <linux/anon_inodes.h> #include <linux/device.h> -#include <linux/freezer.h> #include <asm/uaccess.h> #include <asm/io.h> #include <asm/mman.h> @@ -1605,8 +1604,7 @@ fetch_events: } spin_unlock_irqrestore(&ep->lock, flags); - if (!freezable_schedule_hrtimeout_range(to, slack, - HRTIMER_MODE_ABS)) + if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) timed_out = 1; spin_lock_irqsave(&ep->lock, flags); diff --git a/fs/file_table.c b/fs/file_table.c index abdd15ad13c9..e900ca518635 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -297,7 +297,7 @@ void flush_delayed_fput(void) delayed_fput(NULL); } -static DECLARE_WORK(delayed_fput_work, delayed_fput); +static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); void fput(struct file *file) { @@ -317,7 +317,7 @@ void fput(struct file *file) } if (llist_add(&file->f_u.fu_llist, &delayed_fput_list)) - schedule_work(&delayed_fput_work); + schedule_delayed_work(&delayed_fput_work, 1); } } diff --git a/fs/select.c b/fs/select.c index 35d4adc749d9..dfd5cb18c012 100644 --- a/fs/select.c +++ b/fs/select.c @@ -238,8 +238,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, set_current_state(state); if (!pwq->triggered) - rc = freezable_schedule_hrtimeout_range(expires, slack, - HRTIMER_MODE_ABS); + rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); /* diff --git a/fs/seq_file.c b/fs/seq_file.c index 3135c2525c76..a290157265ef 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -328,6 +328,8 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence) m->read_pos = offset; retval = file->f_pos = offset; } + } else { + file->f_pos = offset; } } file->f_version = m->version; diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 68267b64bb98..7d275c4fc011 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -11,8 +11,48 @@ extern u32 crc32_le(u32 crc, unsigned char const *p, size_t len); extern u32 crc32_be(u32 crc, unsigned char const *p, size_t len); +/** + * crc32_le_combine - Combine two crc32 check values into one. For two + * sequences of bytes, seq1 and seq2 with lengths len1 + * and len2, crc32_le() check values were calculated + * for each, crc1 and crc2. + * + * @crc1: crc32 of the first block + * @crc2: crc32 of the second block + * @len2: length of the second block + * + * Return: The crc32_le() check value of seq1 and seq2 concatenated, + * requiring only crc1, crc2, and len2. Note: If seq_full denotes + * the concatenated memory area of seq1 with seq2, and crc_full + * the crc32_le() value of seq_full, then crc_full == + * crc32_le_combine(crc1, crc2, len2) when crc_full was seeded + * with the same initializer as crc1, and crc2 seed was 0. See + * also crc32_combine_test(). + */ +extern u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2); + extern u32 __crc32c_le(u32 crc, unsigned char const *p, size_t len); +/** + * __crc32c_le_combine - Combine two crc32c check values into one. For two + * sequences of bytes, seq1 and seq2 with lengths len1 + * and len2, __crc32c_le() check values were calculated + * for each, crc1 and crc2. + * + * @crc1: crc32c of the first block + * @crc2: crc32c of the second block + * @len2: length of the second block + * + * Return: The __crc32c_le() check value of seq1 and seq2 concatenated, + * requiring only crc1, crc2, and len2. Note: If seq_full denotes + * the concatenated memory area of seq1 with seq2, and crc_full + * the __crc32c_le() value of seq_full, then crc_full == + * __crc32c_le_combine(crc1, crc2, len2) when crc_full was + * seeded with the same initializer as crc1, and crc2 seed + * was 0. See also crc32c_combine_test(). + */ +extern u32 __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2); + #define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length) /* diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 19c19a5eee29..f6c82de12541 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -34,9 +34,9 @@ struct ipc_namespace { int sem_ctls[4]; int used_sems; - int msg_ctlmax; - int msg_ctlmnb; - int msg_ctlmni; + unsigned int msg_ctlmax; + unsigned int msg_ctlmnb; + unsigned int msg_ctlmni; atomic_t msg_bytes; atomic_t msg_hdrs; int auto_msgmni; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 9ad0c18495ad..f6f59271f857 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -54,6 +54,7 @@ enum { MLX4_FLAG_MASTER = 1 << 2, MLX4_FLAG_SLAVE = 1 << 3, MLX4_FLAG_SRIOV = 1 << 4, + MLX4_FLAG_OLD_REG_MAC = 1 << 6, }; enum { @@ -640,12 +641,23 @@ struct mlx4_counter { __be64 tx_bytes; }; +struct mlx4_quotas { + int qp; + int cq; + int srq; + int mpt; + int mtt; + int counter; + int xrcd; +}; + struct mlx4_dev { struct pci_dev *pdev; unsigned long flags; unsigned long num_slaves; struct mlx4_caps caps; struct mlx4_phys_caps phys_caps; + struct mlx4_quotas quotas; struct radix_tree_root qp_table_tree; u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; @@ -771,6 +783,12 @@ static inline int mlx4_is_master(struct mlx4_dev *dev) return dev->flags & MLX4_FLAG_MASTER; } +static inline int mlx4_num_reserved_sqps(struct mlx4_dev *dev) +{ + return dev->phys_caps.base_sqpn + 8 + + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev); +} + static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) { return (qpn < dev->phys_caps.base_sqpn + 8 + @@ -1078,7 +1096,7 @@ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, u8 *pg, u16 *ratelimit); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); -void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index); +void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, int npages, u64 iova, u32 *lkey, u32 *rkey); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cb1d918ecdf1..e6353cafbf05 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1800,6 +1800,7 @@ static inline void unregister_netdevice(struct net_device *dev) int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); +void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 7967516adc0d..c7174b816674 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -316,6 +316,16 @@ ip_set_init_counter(struct ip_set_counter *counter, atomic64_set(&(counter)->packets, (long long)(ext->packets)); } +/* Netlink CB args */ +enum { + IPSET_CB_NET = 0, + IPSET_CB_DUMP, + IPSET_CB_INDEX, + IPSET_CB_ARG0, + IPSET_CB_ARG1, + IPSET_CB_ARG2, +}; + /* register and unregister set references */ extern ip_set_id_t ip_set_get_byname(struct net *net, const char *name, struct ip_set **set); diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index f3c7c24bec1c..fbfdb9d8d3a7 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -24,7 +24,8 @@ struct netpoll { struct net_device *dev; char dev_name[IFNAMSIZ]; const char *name; - void (*rx_hook)(struct netpoll *, int, char *, int); + void (*rx_skb_hook)(struct netpoll *np, int source, struct sk_buff *skb, + int offset, int len); union inet_addr local_ip, remote_ip; bool ipv6; @@ -41,7 +42,7 @@ struct netpoll_info { unsigned long rx_flags; spinlock_t rx_lock; struct semaphore dev_lock; - struct list_head rx_np; /* netpolls that registered an rx_hook */ + struct list_head rx_np; /* netpolls that registered an rx_skb_hook */ struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */ struct sk_buff_head txq; diff --git a/include/linux/percpu.h b/include/linux/percpu.h index cc88172c7d9a..c74088ab103b 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -332,7 +332,7 @@ do { \ #endif #ifndef this_cpu_sub -# define this_cpu_sub(pcp, val) this_cpu_add((pcp), -(val)) +# define this_cpu_sub(pcp, val) this_cpu_add((pcp), -(typeof(pcp))(val)) #endif #ifndef this_cpu_inc @@ -418,7 +418,7 @@ do { \ # define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) #endif -#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(val)) +#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(typeof(pcp))(val)) #define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) #define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) @@ -586,7 +586,7 @@ do { \ #endif #ifndef __this_cpu_sub -# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(val)) +# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val)) #endif #ifndef __this_cpu_inc @@ -668,7 +668,7 @@ do { \ __pcpu_size_call_return2(__this_cpu_add_return_, pcp, val) #endif -#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(val)) +#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) #define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) #define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2c154976394b..44727b5d4981 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2360,8 +2360,6 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset, void skb_free_datagram(struct sock *sk, struct sk_buff *skb); void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); -__wsum skb_checksum(const struct sk_buff *skb, int offset, int len, - __wsum csum); int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, @@ -2373,9 +2371,18 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); - struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); +struct skb_checksum_ops { + __wsum (*update)(const void *mem, int len, __wsum wsum); + __wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len); +}; + +__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, + __wsum csum, const struct skb_checksum_ops *ops); +__wsum skb_checksum(const struct sk_buff *skb, int offset, int len, + __wsum csum); + static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) { diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index cc25b70af33c..2300f7492927 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -36,6 +36,9 @@ * SUCH DAMAGE. */ +#ifndef __LINUX_USB_CDC_NCM_H +#define __LINUX_USB_CDC_NCM_H + #define CDC_NCM_COMM_ALTSETTING_NCM 0 #define CDC_NCM_COMM_ALTSETTING_MBIM 1 @@ -85,22 +88,13 @@ #define cdc_ncm_data_intf_is_mbim(x) ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB) struct cdc_ncm_ctx { - struct usb_cdc_ncm_ntb_parameters ncm_parm; struct hrtimer tx_timer; struct tasklet_struct bh; const struct usb_cdc_ncm_desc *func_desc; - const struct usb_cdc_mbim_desc *mbim_desc; - const struct usb_cdc_header_desc *header_desc; - const struct usb_cdc_union_desc *union_desc; + const struct usb_cdc_mbim_desc *mbim_desc; const struct usb_cdc_ether_desc *ether_desc; - struct net_device *netdev; - struct usb_device *udev; - struct usb_host_endpoint *in_ep; - struct usb_host_endpoint *out_ep; - struct usb_host_endpoint *status_ep; - struct usb_interface *intf; struct usb_interface *control; struct usb_interface *data; @@ -113,8 +107,6 @@ struct cdc_ncm_ctx { u32 tx_timer_pending; u32 tx_curr_frame_num; - u32 rx_speed; - u32 tx_speed; u32 rx_max; u32 tx_max; u32 max_datagram_size; @@ -127,9 +119,11 @@ struct cdc_ncm_ctx { u16 connected; }; -extern u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf); -extern int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); -extern void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); -extern struct sk_buff *cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign); -extern int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); -extern int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset); +u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf); +int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); +void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); +struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); +int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); +int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset); + +#endif /* __LINUX_USB_CDC_NCM_H */ diff --git a/include/net/checksum.h b/include/net/checksum.h index 8f59ca50477c..37a0e24adbe7 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -79,6 +79,12 @@ csum_block_add(__wsum csum, __wsum csum2, int offset) } static inline __wsum +csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) +{ + return csum_block_add(csum, csum2, offset); +} + +static inline __wsum csum_block_sub(__wsum csum, __wsum csum2, int offset) { u32 sum = (__force u32)csum2; @@ -92,6 +98,11 @@ static inline __wsum csum_unfold(__sum16 n) return (__force __wsum)n; } +static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) +{ + return csum_partial(buff, len, sum); +} + #define CSUM_MANGLED_0 ((__force __sum16)0xffff) static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) diff --git a/include/net/esp.h b/include/net/esp.h index 1356dda00d22..c92213c38312 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -3,16 +3,6 @@ #include <linux/skbuff.h> -struct crypto_aead; - -struct esp_data { - /* 0..255 */ - int padlen; - - /* Confidentiality & Integrity */ - struct crypto_aead *aead; -}; - void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len); struct ip_esp_hdr; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 6738f3409a6f..2182525e4d74 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -165,6 +165,7 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) static inline void rt6_clean_expires(struct rt6_info *rt) { rt->rt6i_flags &= ~RTF_EXPIRES; + rt->dst.expires = 0; } static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 1c2e1b9f6b86..cd7275f9c463 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1442,6 +1442,12 @@ static inline void ip_vs_dest_put(struct ip_vs_dest *dest) atomic_dec(&dest->refcnt); } +static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest) +{ + if (atomic_dec_return(&dest->refcnt) < 0) + kfree(dest); +} + /* * IPVS sync daemon data and function prototypes * (from ip_vs_sync.c) diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h index fef44edf49c1..79d8d16732b4 100644 --- a/include/net/netfilter/nf_conntrack_acct.h +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -19,17 +19,21 @@ struct nf_conn_counter { atomic64_t bytes; }; +struct nf_conn_acct { + struct nf_conn_counter counter[IP_CT_DIR_MAX]; +}; + static inline -struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct) +struct nf_conn_acct *nf_conn_acct_find(const struct nf_conn *ct) { return nf_ct_ext_find(ct, NF_CT_EXT_ACCT); } static inline -struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) +struct nf_conn_acct *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) { struct net *net = nf_ct_net(ct); - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; if (!net->ct.sysctl_acct) return NULL; diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 86372ae0ee84..956b175523ff 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -36,7 +36,7 @@ enum nf_ct_ext_id { #define NF_CT_EXT_HELPER_TYPE struct nf_conn_help #define NF_CT_EXT_NAT_TYPE struct nf_conn_nat #define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj -#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter +#define NF_CT_EXT_ACCT_TYPE struct nf_conn_acct #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 259924d63ba6..6bd44fe94c26 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -42,56 +42,38 @@ #include <linux/types.h> #include <net/sctp/sctp.h> #include <linux/crc32c.h> +#include <linux/crc32.h> -static inline __u32 sctp_crc32c(__u32 crc, u8 *buffer, u16 length) +static inline __wsum sctp_csum_update(const void *buff, int len, __wsum sum) { - return crc32c(crc, buffer, length); -} - -static inline __u32 sctp_start_cksum(__u8 *buffer, __u16 length) -{ - __u32 crc = ~(__u32)0; - __u8 zero[sizeof(__u32)] = {0}; - - /* Optimize this routine to be SCTP specific, knowing how - * to skip the checksum field of the SCTP header. + /* This uses the crypto implementation of crc32c, which is either + * implemented w/ hardware support or resolves to __crc32c_le(). */ - - /* Calculate CRC up to the checksum. */ - crc = sctp_crc32c(crc, buffer, sizeof(struct sctphdr) - sizeof(__u32)); - - /* Skip checksum field of the header. */ - crc = sctp_crc32c(crc, zero, sizeof(__u32)); - - /* Calculate the rest of the CRC. */ - crc = sctp_crc32c(crc, &buffer[sizeof(struct sctphdr)], - length - sizeof(struct sctphdr)); - return crc; -} - -static inline __u32 sctp_update_cksum(__u8 *buffer, __u16 length, __u32 crc32) -{ - return sctp_crc32c(crc32, buffer, length); + return crc32c(sum, buff, len); } -static inline __le32 sctp_end_cksum(__u32 crc32) +static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, + int offset, int len) { - return cpu_to_le32(~crc32); + return __crc32c_le_combine(csum, csum2, len); } -/* Calculate the CRC32C checksum of an SCTP packet. */ static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { - const struct sk_buff *iter; + struct sctphdr *sh = sctp_hdr(skb); + __le32 ret, old = sh->checksum; + const struct skb_checksum_ops ops = { + .update = sctp_csum_update, + .combine = sctp_csum_combine, + }; - __u32 crc32 = sctp_start_cksum(skb->data + offset, - skb_headlen(skb) - offset); - skb_walk_frags(skb, iter) - crc32 = sctp_update_cksum((__u8 *) iter->data, - skb_headlen(iter), crc32); + sh->checksum = 0; + ret = cpu_to_le32(~__skb_checksum(skb, offset, skb->len - offset, + ~(__u32)0, &ops)); + sh->checksum = old; - return sctp_end_cksum(crc32); + return ret; } #endif /* __sctp_checksum_h__ */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 2d7b4bdc972f..70e55d200610 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -275,7 +275,6 @@ extern int sysctl_tcp_mtu_probing; extern int sysctl_tcp_base_mss; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; -extern int sysctl_tcp_max_ssthresh; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; extern int sysctl_tcp_early_retrans; @@ -797,7 +796,7 @@ struct tcp_congestion_ops { /* lower bound for congestion window (optional) */ u32 (*min_cwnd)(const struct sock *sk); /* do new cwnd calculation (required) */ - void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight); + void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked, u32 in_flight); /* call before changing ca_state (optional) */ void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ @@ -824,12 +823,12 @@ void tcp_get_available_congestion_control(char *buf, size_t len); void tcp_get_allowed_congestion_control(char *buf, size_t len); int tcp_set_allowed_congestion_control(char *allowed); int tcp_set_congestion_control(struct sock *sk, const char *name); -void tcp_slow_start(struct tcp_sock *tp); +int tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w); extern struct tcp_congestion_ops tcp_init_congestion_ops; u32 tcp_reno_ssthresh(struct sock *sk); -void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight); +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight); u32 tcp_reno_min_cwnd(const struct sock *sk); extern struct tcp_congestion_ops tcp_reno; diff --git a/include/trace/events/target.h b/include/trace/events/target.h index aef8fc354025..da9cc0f05c93 100644 --- a/include/trace/events/target.h +++ b/include/trace/events/target.h @@ -144,7 +144,7 @@ TRACE_EVENT(target_sequencer_start, ), TP_fast_assign( - __entry->unpacked_lun = cmd->se_lun->unpacked_lun; + __entry->unpacked_lun = cmd->orig_fe_lun; __entry->opcode = cmd->t_task_cdb[0]; __entry->data_length = cmd->data_length; __entry->task_attribute = cmd->sam_task_attr; @@ -182,7 +182,7 @@ TRACE_EVENT(target_cmd_complete, ), TP_fast_assign( - __entry->unpacked_lun = cmd->se_lun->unpacked_lun; + __entry->unpacked_lun = cmd->orig_fe_lun; __entry->opcode = cmd->t_task_cdb[0]; __entry->data_length = cmd->data_length; __entry->task_attribute = cmd->sam_task_attr; diff --git a/include/uapi/linux/hsr_netlink.h b/include/uapi/linux/hsr_netlink.h new file mode 100644 index 000000000000..2475cb8a53af --- /dev/null +++ b/include/uapi/linux/hsr_netlink.h @@ -0,0 +1,50 @@ +/* + * Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + */ + +#ifndef __UAPI_HSR_NETLINK_H +#define __UAPI_HSR_NETLINK_H + +/* Generic Netlink HSR family definition + */ + +/* attributes */ +enum { + HSR_A_UNSPEC, + HSR_A_NODE_ADDR, + HSR_A_IFINDEX, + HSR_A_IF1_AGE, + HSR_A_IF2_AGE, + HSR_A_NODE_ADDR_B, + HSR_A_IF1_SEQ, + HSR_A_IF2_SEQ, + HSR_A_IF1_IFINDEX, + HSR_A_IF2_IFINDEX, + HSR_A_ADDR_B_IFINDEX, + __HSR_A_MAX, +}; +#define HSR_A_MAX (__HSR_A_MAX - 1) + + +/* commands */ +enum { + HSR_C_UNSPEC, + HSR_C_RING_ERROR, + HSR_C_NODE_DOWN, + HSR_C_GET_NODE_STATUS, + HSR_C_SET_NODE_STATUS, + HSR_C_GET_NODE_LIST, + HSR_C_SET_NODE_LIST, + __HSR_C_MAX, +}; +#define HSR_C_MAX (__HSR_C_MAX - 1) + +#endif /* __UAPI_HSR_NETLINK_H */ diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index ade07f1c491a..2ce0f6a78fa5 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -85,6 +85,7 @@ #define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */ #define ETH_P_MVRP 0x88F5 /* 802.1Q MVRP */ #define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */ +#define ETH_P_PRP 0x88FB /* IEC 62439-3 PRP/HSRv0 */ #define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */ #define ETH_P_TDLS 0x890D /* TDLS */ #define ETH_P_FIP 0x8914 /* FCoE Initialization Protocol */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8a1e346243b7..b78566f59aba 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -481,4 +481,17 @@ enum { #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) + +/* HSR section */ + +enum { + IFLA_HSR_UNSPEC, + IFLA_HSR_SLAVE1, + IFLA_HSR_SLAVE2, + IFLA_HSR_MULTICAST_SPEC, + __IFLA_HSR_MAX, +}; + +#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h index 29458223d044..fbcffe8041f7 100644 --- a/include/uapi/linux/ip_vs.h +++ b/include/uapi/linux/ip_vs.h @@ -334,7 +334,7 @@ enum { __IPVS_CMD_ATTR_MAX, }; -#define IPVS_CMD_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1) +#define IPVS_CMD_ATTR_MAX (__IPVS_CMD_ATTR_MAX - 1) /* * Attributes used to describe a service diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index a74d375b439b..d120f9fe0017 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -63,15 +63,18 @@ enum ovs_datapath_cmd { * not be sent. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. + * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the + * datapath. Always present in notifications. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_DP_* commands. */ enum ovs_datapath_attr { OVS_DP_ATTR_UNSPEC, - OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ - OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ - OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ + OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ + OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ + OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ + OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */ __OVS_DP_ATTR_MAX }; @@ -84,6 +87,14 @@ struct ovs_dp_stats { __u64 n_flows; /* Number of flows present */ }; +struct ovs_dp_megaflow_stats { + __u64 n_mask_hit; /* Number of masks used for flow lookups. */ + __u32 n_masks; /* Number of masks for the datapath. */ + __u32 pad0; /* Pad for future expension. */ + __u64 pad1; /* Pad for future expension. */ + __u64 pad2; /* Pad for future expension. */ +}; + struct ovs_vport_stats { __u64 rx_packets; /* total packets received */ __u64 tx_packets; /* total packets transmitted */ @@ -260,6 +271,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */ OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */ + OVS_KEY_ATTR_TCP_FLAGS, /* be16 TCP flags. */ #ifdef __KERNEL__ OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 009a655a5d35..2fc1602e23bb 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -456,13 +456,15 @@ struct perf_event_mmap_page { /* * Control data for the mmap() data buffer. * - * User-space reading the @data_head value should issue an rmb(), on - * SMP capable platforms, after reading this value -- see - * perf_event_wakeup(). + * User-space reading the @data_head value should issue an smp_rmb(), + * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data. In this case - * the kernel will not over-write unread data. + * written by userspace to reflect the last read data, after issueing + * an smp_mb() to separate the data read from the ->data_tail store. + * In this case the kernel will not over-write unread data. + * + * See perf_output_put_handle() for the data ordering. */ __u64 data_head; /* head in the data section */ __u64 data_tail; /* user-space written tail */ diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 130dfece27ac..b0e99deb6d05 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -62,7 +62,7 @@ static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write, return err; } -static int proc_ipc_callback_dointvec(ctl_table *table, int write, +static int proc_ipc_callback_dointvec_minmax(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; @@ -72,7 +72,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); - rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos); + rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); if (write && !rc && lenp_bef == *lenp) /* @@ -152,15 +152,13 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, #define proc_ipc_dointvec NULL #define proc_ipc_dointvec_minmax NULL #define proc_ipc_dointvec_minmax_orphans NULL -#define proc_ipc_callback_dointvec NULL +#define proc_ipc_callback_dointvec_minmax NULL #define proc_ipcauto_dointvec_minmax NULL #endif static int zero; static int one = 1; -#ifdef CONFIG_CHECKPOINT_RESTORE static int int_max = INT_MAX; -#endif static struct ctl_table ipc_kern_table[] = { { @@ -198,21 +196,27 @@ static struct ctl_table ipc_kern_table[] = { .data = &init_ipc_ns.msg_ctlmax, .maxlen = sizeof (init_ipc_ns.msg_ctlmax), .mode = 0644, - .proc_handler = proc_ipc_dointvec, + .proc_handler = proc_ipc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "msgmni", .data = &init_ipc_ns.msg_ctlmni, .maxlen = sizeof (init_ipc_ns.msg_ctlmni), .mode = 0644, - .proc_handler = proc_ipc_callback_dointvec, + .proc_handler = proc_ipc_callback_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "msgmnb", .data = &init_ipc_ns.msg_ctlmnb, .maxlen = sizeof (init_ipc_ns.msg_ctlmnb), .mode = 0644, - .proc_handler = proc_ipc_dointvec, + .proc_handler = proc_ipc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "sem", diff --git a/kernel/events/core.c b/kernel/events/core.c index d49a9d29334c..953c14348375 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6767,6 +6767,10 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (ret) return -EFAULT; + /* disabled for now */ + if (attr->mmap2) + return -EINVAL; + if (attr->__reserved_1) return -EINVAL; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index cd55144270b5..9c2ddfbf4525 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -87,10 +87,31 @@ again: goto out; /* - * Publish the known good head. Rely on the full barrier implied - * by atomic_dec_and_test() order the rb->head read and this - * write. + * Since the mmap() consumer (userspace) can run on a different CPU: + * + * kernel user + * + * READ ->data_tail READ ->data_head + * smp_mb() (A) smp_rmb() (C) + * WRITE $data READ $data + * smp_wmb() (B) smp_mb() (D) + * STORE ->data_head WRITE ->data_tail + * + * Where A pairs with D, and B pairs with C. + * + * I don't think A needs to be a full barrier because we won't in fact + * write data until we see the store from userspace. So we simply don't + * issue the data WRITE until we observe it. Be conservative for now. + * + * OTOH, D needs to be a full barrier since it separates the data READ + * from the tail WRITE. + * + * For B a WMB is sufficient since it separates two WRITEs, and for C + * an RMB is sufficient since it separates two READs. + * + * See perf_output_begin(). */ + smp_wmb(); rb->user_page->data_head = head; /* @@ -154,9 +175,11 @@ int perf_output_begin(struct perf_output_handle *handle, * Userspace could choose to issue a mb() before updating the * tail pointer. So that all reads will be completed before the * write is issued. + * + * See perf_output_put_handle(). */ tail = ACCESS_ONCE(rb->user_page->data_tail); - smp_rmb(); + smp_mb(); offset = head = local_read(&rb->head); head += size; if (unlikely(!perf_output_space(rb, tail, offset, head))) diff --git a/kernel/mutex.c b/kernel/mutex.c index 6d647aedffea..d24105b1b794 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -410,7 +410,7 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, static __always_inline int __sched __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip, - struct ww_acquire_ctx *ww_ctx) + struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) { struct task_struct *task = current; struct mutex_waiter waiter; @@ -450,7 +450,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct task_struct *owner; struct mspin_node node; - if (!__builtin_constant_p(ww_ctx == NULL) && ww_ctx->acquired > 0) { + if (use_ww_ctx && ww_ctx->acquired > 0) { struct ww_mutex *ww; ww = container_of(lock, struct ww_mutex, base); @@ -480,7 +480,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, if ((atomic_read(&lock->count) == 1) && (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { lock_acquired(&lock->dep_map, ip); - if (!__builtin_constant_p(ww_ctx == NULL)) { + if (use_ww_ctx) { struct ww_mutex *ww; ww = container_of(lock, struct ww_mutex, base); @@ -551,7 +551,7 @@ slowpath: goto err; } - if (!__builtin_constant_p(ww_ctx == NULL) && ww_ctx->acquired > 0) { + if (use_ww_ctx && ww_ctx->acquired > 0) { ret = __mutex_lock_check_stamp(lock, ww_ctx); if (ret) goto err; @@ -575,7 +575,7 @@ skip_wait: lock_acquired(&lock->dep_map, ip); mutex_set_owner(lock); - if (!__builtin_constant_p(ww_ctx == NULL)) { + if (use_ww_ctx) { struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); struct mutex_waiter *cur; @@ -615,7 +615,7 @@ mutex_lock_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, - subclass, NULL, _RET_IP_, NULL); + subclass, NULL, _RET_IP_, NULL, 0); } EXPORT_SYMBOL_GPL(mutex_lock_nested); @@ -625,7 +625,7 @@ _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) { might_sleep(); __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, - 0, nest, _RET_IP_, NULL); + 0, nest, _RET_IP_, NULL, 0); } EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); @@ -635,7 +635,7 @@ mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); return __mutex_lock_common(lock, TASK_KILLABLE, - subclass, NULL, _RET_IP_, NULL); + subclass, NULL, _RET_IP_, NULL, 0); } EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); @@ -644,7 +644,7 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, - subclass, NULL, _RET_IP_, NULL); + subclass, NULL, _RET_IP_, NULL, 0); } EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); @@ -682,7 +682,7 @@ __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) might_sleep(); ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, - 0, &ctx->dep_map, _RET_IP_, ctx); + 0, &ctx->dep_map, _RET_IP_, ctx, 1); if (!ret && ctx->acquired > 1) return ww_mutex_deadlock_injection(lock, ctx); @@ -697,7 +697,7 @@ __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) might_sleep(); ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, - 0, &ctx->dep_map, _RET_IP_, ctx); + 0, &ctx->dep_map, _RET_IP_, ctx, 1); if (!ret && ctx->acquired > 1) return ww_mutex_deadlock_injection(lock, ctx); @@ -809,28 +809,28 @@ __mutex_lock_slowpath(atomic_t *lock_count) struct mutex *lock = container_of(lock_count, struct mutex, count); __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, - NULL, _RET_IP_, NULL); + NULL, _RET_IP_, NULL, 0); } static noinline int __sched __mutex_lock_killable_slowpath(struct mutex *lock) { return __mutex_lock_common(lock, TASK_KILLABLE, 0, - NULL, _RET_IP_, NULL); + NULL, _RET_IP_, NULL, 0); } static noinline int __sched __mutex_lock_interruptible_slowpath(struct mutex *lock) { return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, - NULL, _RET_IP_, NULL); + NULL, _RET_IP_, NULL, 0); } static noinline int __sched __ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { return __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 0, - NULL, _RET_IP_, ctx); + NULL, _RET_IP_, ctx, 1); } static noinline int __sched @@ -838,7 +838,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0, - NULL, _RET_IP_, ctx); + NULL, _RET_IP_, ctx, 1); } #endif diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index c9c759d5a15c..0121dab83f43 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -846,7 +846,7 @@ static int software_resume(void) goto Finish; } -late_initcall(software_resume); +late_initcall_sync(software_resume); static const char * const hibernation_modes[] = { diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 38959c866789..662c5798a685 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -33,29 +33,64 @@ struct ce_unbind { int res; }; -/** - * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds - * @latch: value to convert - * @evt: pointer to clock event device descriptor - * - * Math helper, returns latch value converted to nanoseconds (bound checked) - */ -u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) +static u64 cev_delta2ns(unsigned long latch, struct clock_event_device *evt, + bool ismax) { u64 clc = (u64) latch << evt->shift; + u64 rnd; if (unlikely(!evt->mult)) { evt->mult = 1; WARN_ON(1); } + rnd = (u64) evt->mult - 1; + + /* + * Upper bound sanity check. If the backwards conversion is + * not equal latch, we know that the above shift overflowed. + */ + if ((clc >> evt->shift) != (u64)latch) + clc = ~0ULL; + + /* + * Scaled math oddities: + * + * For mult <= (1 << shift) we can safely add mult - 1 to + * prevent integer rounding loss. So the backwards conversion + * from nsec to device ticks will be correct. + * + * For mult > (1 << shift), i.e. device frequency is > 1GHz we + * need to be careful. Adding mult - 1 will result in a value + * which when converted back to device ticks can be larger + * than latch by up to (mult - 1) >> shift. For the min_delta + * calculation we still want to apply this in order to stay + * above the minimum device ticks limit. For the upper limit + * we would end up with a latch value larger than the upper + * limit of the device, so we omit the add to stay below the + * device upper boundary. + * + * Also omit the add if it would overflow the u64 boundary. + */ + if ((~0ULL - clc > rnd) && + (!ismax || evt->mult <= (1U << evt->shift))) + clc += rnd; do_div(clc, evt->mult); - if (clc < 1000) - clc = 1000; - if (clc > KTIME_MAX) - clc = KTIME_MAX; - return clc; + /* Deltas less than 1usec are pointless noise */ + return clc > 1000 ? clc : 1000; +} + +/** + * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds + * @latch: value to convert + * @evt: pointer to clock event device descriptor + * + * Math helper, returns latch value converted to nanoseconds (bound checked) + */ +u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) +{ + return cev_delta2ns(latch, evt, false); } EXPORT_SYMBOL_GPL(clockevent_delta2ns); @@ -380,8 +415,8 @@ void clockevents_config(struct clock_event_device *dev, u32 freq) sec = 600; clockevents_calc_mult_shift(dev, freq, sec); - dev->min_delta_ns = clockevent_delta2ns(dev->min_delta_ticks, dev); - dev->max_delta_ns = clockevent_delta2ns(dev->max_delta_ticks, dev); + dev->min_delta_ns = cev_delta2ns(dev->min_delta_ticks, dev, false); + dev->max_delta_ns = cev_delta2ns(dev->max_delta_ticks, dev, true); } /** diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 06344d986eb9..094f3152ec2b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -983,7 +983,7 @@ config DEBUG_KOBJECT config DEBUG_KOBJECT_RELEASE bool "kobject release debugging" - depends on DEBUG_KERNEL + depends on DEBUG_OBJECTS_TIMERS help kobjects are reference counted objects. This means that their last reference count put is not predictable, and the kobject can diff --git a/lib/crc32.c b/lib/crc32.c index 410093dbe51c..70f00ca5ef1e 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -29,6 +29,7 @@ #include <linux/crc32.h> #include <linux/module.h> #include <linux/types.h> +#include <linux/sched.h> #include "crc32defs.h" #if CRC_LE_BITS > 8 @@ -49,6 +50,30 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>"); MODULE_DESCRIPTION("Various CRC32 calculations"); MODULE_LICENSE("GPL"); +#define GF2_DIM 32 + +static u32 gf2_matrix_times(u32 *mat, u32 vec) +{ + u32 sum = 0; + + while (vec) { + if (vec & 1) + sum ^= *mat; + vec >>= 1; + mat++; + } + + return sum; +} + +static void gf2_matrix_square(u32 *square, u32 *mat) +{ + int i; + + for (i = 0; i < GF2_DIM; i++) + square[i] = gf2_matrix_times(mat, mat[i]); +} + #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 /* implements slicing-by-4 or slicing-by-8 algorithm */ @@ -130,6 +155,52 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) } #endif +/* For conditions of distribution and use, see copyright notice in zlib.h */ +static u32 crc32_generic_combine(u32 crc1, u32 crc2, size_t len2, + u32 polynomial) +{ + u32 even[GF2_DIM]; /* Even-power-of-two zeros operator */ + u32 odd[GF2_DIM]; /* Odd-power-of-two zeros operator */ + u32 row; + int i; + + if (len2 <= 0) + return crc1; + + /* Put operator for one zero bit in odd */ + odd[0] = polynomial; + row = 1; + for (i = 1; i < GF2_DIM; i++) { + odd[i] = row; + row <<= 1; + } + + gf2_matrix_square(even, odd); /* Put operator for two zero bits in even */ + gf2_matrix_square(odd, even); /* Put operator for four zero bits in odd */ + + /* Apply len2 zeros to crc1 (first square will put the operator for one + * zero byte, eight zero bits, in even). + */ + do { + /* Apply zeros operator for this bit of len2 */ + gf2_matrix_square(even, odd); + if (len2 & 1) + crc1 = gf2_matrix_times(even, crc1); + len2 >>= 1; + /* If no more bits set, then done */ + if (len2 == 0) + break; + /* Another iteration of the loop with odd and even swapped */ + gf2_matrix_square(odd, even); + if (len2 & 1) + crc1 = gf2_matrix_times(odd, crc1); + len2 >>= 1; + } while (len2 != 0); + + crc1 ^= crc2; + return crc1; +} + /** * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II * CRC32/CRC32C @@ -200,8 +271,19 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE); } #endif +u32 __pure crc32_le_combine(u32 crc1, u32 crc2, size_t len2) +{ + return crc32_generic_combine(crc1, crc2, len2, CRCPOLY_LE); +} + +u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2) +{ + return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE); +} EXPORT_SYMBOL(crc32_le); +EXPORT_SYMBOL(crc32_le_combine); EXPORT_SYMBOL(__crc32c_le); +EXPORT_SYMBOL(__crc32c_le_combine); /** * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 @@ -795,206 +877,106 @@ static struct crc_test { u32 crc32c_le; /* expected crc32c_le result */ } test[] = { - {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, - 0xf6e93d6c}, - {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, - 0x0fe92aca}, - {0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f, - 0x52e1ebb8}, - {0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a, - 0x0798af9a}, - {0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2, - 0x18eb3152}, - {0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793, - 0xd00d08c7}, - {0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed, - 0x8ba966bc}, - {0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35, - 0x11d694a2}, - {0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2, - 0x6ab3208d}, - {0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10, - 0xba4603c5}, - {0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb, - 0xe6071c6f}, - {0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0, - 0x179ec30a}, - {0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb, - 0x0903beb8}, - {0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed, - 0x6a7cb4fa}, - {0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591, - 0xdb535801}, - {0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67, - 0x92bed597}, - {0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd, - 0x192a3f1b}, - {0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a, - 0xccbaec1a}, - {0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b, - 0x7eabae4d}, - {0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f, - 0x28c72982}, - {0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d, - 0xc3cd4d18}, - {0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a, - 0xbca8f0e7}, - {0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97, - 0x713f60b3}, - {0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2, - 0xebd08fd5}, - {0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138, - 0x64406c59}, - {0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032, - 0x7421890e}, - {0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f, - 0xe9347603}, - {0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f, - 0x1bef9060}, - {0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32, - 0x34720072}, - {0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef, - 0x48310f59}, - {0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0, - 0x783a4213}, - {0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59, - 0x9e8efd41}, - {0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4, - 0xfc3d34a5}, - {0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c, - 0x17a52ae2}, - {0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51, - 0x886d935a}, - {0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11, - 0xeaaeaeb2}, - {0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659, - 0x8e900a4b}, - {0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af, - 0xd74662b1}, - {0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99, - 0xd26752ba}, - {0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b, - 0x8b1fcd62}, - {0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521, - 0xf54342fe}, - {0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3, - 0x5b95b988}, - {0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d, - 0x2e1176be}, - {0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f, - 0x66120546}, - {0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b, - 0xf256a5cc}, - {0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0, - 0x4af1dd69}, - {0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195, - 0x56f0a04a}, - {0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d, - 0x74f6b6b2}, - {0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4, - 0x085951fd}, - {0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3, - 0xc65387eb}, - {0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643, - 0x1ca9257b}, - {0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10, - 0xfd196d76}, - {0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d, - 0x5ef88339}, - {0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5, - 0x2c3714d9}, - {0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b, - 0x58576548}, - {0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee, - 0xfd7c57de}, - {0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14, - 0xd5fedd59}, - {0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a, - 0x1cc3b17b}, - {0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b, - 0x270eed73}, - {0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3, - 0x91ecbb11}, - {0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826, - 0x05ed8d0c}, - {0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06, - 0x0b09ad5b}, - {0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35, - 0xf8d511fb}, - {0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801, - 0x5ad832cc}, - {0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2, - 0x1214d196}, - {0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d, - 0x5747218a}, - {0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c, - 0xde8f14de}, - {0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba, - 0x3563b7b9}, - {0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5, - 0x071475d0}, - {0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b, - 0x54c79d60}, - {0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178, - 0x4c53eee6}, - {0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3, - 0x10137a3c}, - {0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605, - 0xaa9d6c73}, - {0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1, - 0xb63d23e7}, - {0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9, - 0x7f53e9cf}, - {0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78, - 0x13c1cd83}, - {0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9, - 0x49ff5867}, - {0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd, - 0x8467f211}, - {0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab, - 0x3f9683b2}, - {0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb, - 0x76a3f874}, - {0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77, - 0x863b702f}, - {0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da, - 0xdc6c58ff}, - {0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39, - 0x0622cc95}, - {0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16, - 0xe85605cd}, - {0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208, - 0x31da5f06}, - {0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e, - 0xa1f2e784}, - {0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5, - 0xb07cc616}, - {0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892, - 0xbf943b6c}, - {0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db, - 0x2c01af1c}, - {0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43, - 0x0fe5f56d}, - {0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac, - 0xf8943b2d}, - {0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7, - 0xe4d89272}, - {0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2, - 0x7c2f6bbb}, - {0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2, - 0xabbf388b}, - {0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640, - 0x1dca1f4e}, - {0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f, - 0x5c170e23}, - {0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99, - 0xc0e9d672}, - {0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7, - 0xc18bdc86}, - {0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499, - 0xa874fcdd}, - {0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a, - 0x9dc0bb48}, + {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c}, + {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca}, + {0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f, 0x52e1ebb8}, + {0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a, 0x0798af9a}, + {0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2, 0x18eb3152}, + {0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793, 0xd00d08c7}, + {0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed, 0x8ba966bc}, + {0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35, 0x11d694a2}, + {0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2, 0x6ab3208d}, + {0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10, 0xba4603c5}, + {0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb, 0xe6071c6f}, + {0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0, 0x179ec30a}, + {0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb, 0x0903beb8}, + {0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed, 0x6a7cb4fa}, + {0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591, 0xdb535801}, + {0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67, 0x92bed597}, + {0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd, 0x192a3f1b}, + {0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a, 0xccbaec1a}, + {0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b, 0x7eabae4d}, + {0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f, 0x28c72982}, + {0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d, 0xc3cd4d18}, + {0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a, 0xbca8f0e7}, + {0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97, 0x713f60b3}, + {0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2, 0xebd08fd5}, + {0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138, 0x64406c59}, + {0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032, 0x7421890e}, + {0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f, 0xe9347603}, + {0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f, 0x1bef9060}, + {0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32, 0x34720072}, + {0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef, 0x48310f59}, + {0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0, 0x783a4213}, + {0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59, 0x9e8efd41}, + {0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4, 0xfc3d34a5}, + {0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c, 0x17a52ae2}, + {0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51, 0x886d935a}, + {0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11, 0xeaaeaeb2}, + {0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659, 0x8e900a4b}, + {0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af, 0xd74662b1}, + {0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99, 0xd26752ba}, + {0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b, 0x8b1fcd62}, + {0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521, 0xf54342fe}, + {0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3, 0x5b95b988}, + {0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d, 0x2e1176be}, + {0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f, 0x66120546}, + {0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b, 0xf256a5cc}, + {0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0, 0x4af1dd69}, + {0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195, 0x56f0a04a}, + {0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d, 0x74f6b6b2}, + {0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4, 0x085951fd}, + {0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3, 0xc65387eb}, + {0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643, 0x1ca9257b}, + {0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10, 0xfd196d76}, + {0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d, 0x5ef88339}, + {0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5, 0x2c3714d9}, + {0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b, 0x58576548}, + {0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee, 0xfd7c57de}, + {0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14, 0xd5fedd59}, + {0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a, 0x1cc3b17b}, + {0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b, 0x270eed73}, + {0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3, 0x91ecbb11}, + {0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826, 0x05ed8d0c}, + {0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06, 0x0b09ad5b}, + {0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35, 0xf8d511fb}, + {0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801, 0x5ad832cc}, + {0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2, 0x1214d196}, + {0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d, 0x5747218a}, + {0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c, 0xde8f14de}, + {0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba, 0x3563b7b9}, + {0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5, 0x071475d0}, + {0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b, 0x54c79d60}, + {0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178, 0x4c53eee6}, + {0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3, 0x10137a3c}, + {0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605, 0xaa9d6c73}, + {0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1, 0xb63d23e7}, + {0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9, 0x7f53e9cf}, + {0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78, 0x13c1cd83}, + {0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9, 0x49ff5867}, + {0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd, 0x8467f211}, + {0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab, 0x3f9683b2}, + {0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb, 0x76a3f874}, + {0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77, 0x863b702f}, + {0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da, 0xdc6c58ff}, + {0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39, 0x0622cc95}, + {0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16, 0xe85605cd}, + {0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208, 0x31da5f06}, + {0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e, 0xa1f2e784}, + {0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5, 0xb07cc616}, + {0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892, 0xbf943b6c}, + {0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db, 0x2c01af1c}, + {0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43, 0x0fe5f56d}, + {0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac, 0xf8943b2d}, + {0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7, 0xe4d89272}, + {0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2, 0x7c2f6bbb}, + {0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2, 0xabbf388b}, + {0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640, 0x1dca1f4e}, + {0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f, 0x5c170e23}, + {0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99, 0xc0e9d672}, + {0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7, 0xc18bdc86}, + {0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499, 0xa874fcdd}, + {0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a, 0x9dc0bb48}, }; #include <linux/time.h> @@ -1050,6 +1032,41 @@ static int __init crc32c_test(void) return 0; } +static int __init crc32c_combine_test(void) +{ + int i, j; + int errors = 0, runs = 0; + + for (i = 0; i < 10; i++) { + u32 crc_full; + + crc_full = __crc32c_le(test[i].crc, test_buf + test[i].start, + test[i].length); + for (j = 0; j <= test[i].length; ++j) { + u32 crc1, crc2; + u32 len1 = j, len2 = test[i].length - j; + + crc1 = __crc32c_le(test[i].crc, test_buf + + test[i].start, len1); + crc2 = __crc32c_le(0, test_buf + test[i].start + + len1, len2); + + if (!(crc_full == __crc32c_le_combine(crc1, crc2, len2) && + crc_full == test[i].crc32c_le)) + errors++; + runs++; + cond_resched(); + } + } + + if (errors) + pr_warn("crc32c_combine: %d/%d self tests failed\n", errors, runs); + else + pr_info("crc32c_combine: %d self tests passed\n", runs); + + return 0; +} + static int __init crc32_test(void) { int i; @@ -1109,10 +1126,49 @@ static int __init crc32_test(void) return 0; } +static int __init crc32_combine_test(void) +{ + int i, j; + int errors = 0, runs = 0; + + for (i = 0; i < 10; i++) { + u32 crc_full; + + crc_full = crc32_le(test[i].crc, test_buf + test[i].start, + test[i].length); + for (j = 0; j <= test[i].length; ++j) { + u32 crc1, crc2; + u32 len1 = j, len2 = test[i].length - j; + + crc1 = crc32_le(test[i].crc, test_buf + + test[i].start, len1); + crc2 = crc32_le(0, test_buf + test[i].start + + len1, len2); + + if (!(crc_full == crc32_le_combine(crc1, crc2, len2) && + crc_full == test[i].crc_le)) + errors++; + runs++; + cond_resched(); + } + } + + if (errors) + pr_warn("crc32_combine: %d/%d self tests failed\n", errors, runs); + else + pr_info("crc32_combine: %d self tests passed\n", runs); + + return 0; +} + static int __init crc32test_init(void) { crc32_test(); crc32c_test(); + + crc32_combine_test(); + crc32c_combine_test(); + return 0; } diff --git a/lib/scatterlist.c b/lib/scatterlist.c index a685c8a79578..d16fa295ae1d 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -577,7 +577,8 @@ void sg_miter_stop(struct sg_mapping_iter *miter) miter->__offset += miter->consumed; miter->__remaining -= miter->consumed; - if (miter->__flags & SG_MITER_TO_SG) + if ((miter->__flags & SG_MITER_TO_SG) && + !PageSlab(miter->page)) flush_kernel_dcache_page(miter->page); if (miter->__flags & SG_MITER_ATOMIC) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 610e3df2768a..cca80d96e509 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1278,64 +1278,90 @@ out: int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t pmd, pmd_t *pmdp) { + struct anon_vma *anon_vma = NULL; struct page *page; unsigned long haddr = addr & HPAGE_PMD_MASK; + int page_nid = -1, this_nid = numa_node_id(); int target_nid; - int current_nid = -1; - bool migrated; + bool page_locked; + bool migrated = false; spin_lock(&mm->page_table_lock); if (unlikely(!pmd_same(pmd, *pmdp))) goto out_unlock; page = pmd_page(pmd); - get_page(page); - current_nid = page_to_nid(page); + page_nid = page_to_nid(page); count_vm_numa_event(NUMA_HINT_FAULTS); - if (current_nid == numa_node_id()) + if (page_nid == this_nid) count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); + /* + * Acquire the page lock to serialise THP migrations but avoid dropping + * page_table_lock if at all possible + */ + page_locked = trylock_page(page); target_nid = mpol_misplaced(page, vma, haddr); if (target_nid == -1) { - put_page(page); - goto clear_pmdnuma; + /* If the page was locked, there are no parallel migrations */ + if (page_locked) + goto clear_pmdnuma; + + /* + * Otherwise wait for potential migrations and retry. We do + * relock and check_same as the page may no longer be mapped. + * As the fault is being retried, do not account for it. + */ + spin_unlock(&mm->page_table_lock); + wait_on_page_locked(page); + page_nid = -1; + goto out; } - /* Acquire the page lock to serialise THP migrations */ + /* Page is misplaced, serialise migrations and parallel THP splits */ + get_page(page); spin_unlock(&mm->page_table_lock); - lock_page(page); + if (!page_locked) + lock_page(page); + anon_vma = page_lock_anon_vma_read(page); /* Confirm the PTE did not while locked */ spin_lock(&mm->page_table_lock); if (unlikely(!pmd_same(pmd, *pmdp))) { unlock_page(page); put_page(page); + page_nid = -1; goto out_unlock; } - spin_unlock(&mm->page_table_lock); - /* Migrate the THP to the requested node */ + /* + * Migrate the THP to the requested node, returns with page unlocked + * and pmd_numa cleared. + */ + spin_unlock(&mm->page_table_lock); migrated = migrate_misplaced_transhuge_page(mm, vma, pmdp, pmd, addr, page, target_nid); - if (!migrated) - goto check_same; - - task_numa_fault(target_nid, HPAGE_PMD_NR, true); - return 0; + if (migrated) + page_nid = target_nid; -check_same: - spin_lock(&mm->page_table_lock); - if (unlikely(!pmd_same(pmd, *pmdp))) - goto out_unlock; + goto out; clear_pmdnuma: + BUG_ON(!PageLocked(page)); pmd = pmd_mknonnuma(pmd); set_pmd_at(mm, haddr, pmdp, pmd); VM_BUG_ON(pmd_numa(*pmdp)); update_mmu_cache_pmd(vma, addr, pmdp); + unlock_page(page); out_unlock: spin_unlock(&mm->page_table_lock); - if (current_nid != -1) - task_numa_fault(current_nid, HPAGE_PMD_NR, false); + +out: + if (anon_vma) + page_unlock_anon_vma_read(anon_vma); + + if (page_nid != -1) + task_numa_fault(page_nid, HPAGE_PMD_NR, migrated); + return 0; } diff --git a/mm/list_lru.c b/mm/list_lru.c index 72467914b856..72f9decb0104 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -81,8 +81,9 @@ restart: * decrement nr_to_walk first so that we don't livelock if we * get stuck on large numbesr of LRU_RETRY items */ - if (--(*nr_to_walk) == 0) + if (!*nr_to_walk) break; + --*nr_to_walk; ret = isolate(item, &nlru->lock, cb_arg); switch (ret) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9c9c685e4ddc..665dcd7abfff 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -54,6 +54,7 @@ #include <linux/page_cgroup.h> #include <linux/cpu.h> #include <linux/oom.h> +#include <linux/lockdep.h> #include "internal.h" #include <net/sock.h> #include <net/ip.h> @@ -2046,6 +2047,12 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, return total; } +#ifdef CONFIG_LOCKDEP +static struct lockdep_map memcg_oom_lock_dep_map = { + .name = "memcg_oom_lock", +}; +#endif + static DEFINE_SPINLOCK(memcg_oom_lock); /* @@ -2083,7 +2090,8 @@ static bool mem_cgroup_oom_trylock(struct mem_cgroup *memcg) } iter->oom_lock = false; } - } + } else + mutex_acquire(&memcg_oom_lock_dep_map, 0, 1, _RET_IP_); spin_unlock(&memcg_oom_lock); @@ -2095,6 +2103,7 @@ static void mem_cgroup_oom_unlock(struct mem_cgroup *memcg) struct mem_cgroup *iter; spin_lock(&memcg_oom_lock); + mutex_release(&memcg_oom_lock_dep_map, 1, _RET_IP_); for_each_mem_cgroup_tree(iter, memcg) iter->oom_lock = false; spin_unlock(&memcg_oom_lock); @@ -2765,10 +2774,10 @@ done: *ptr = memcg; return 0; nomem: - *ptr = NULL; - if (gfp_mask & __GFP_NOFAIL) - return 0; - return -ENOMEM; + if (!(gfp_mask & __GFP_NOFAIL)) { + *ptr = NULL; + return -ENOMEM; + } bypass: *ptr = root_mem_cgroup; return -EINTR; @@ -3773,8 +3782,7 @@ void mem_cgroup_move_account_page_stat(struct mem_cgroup *from, { /* Update stat data for mem_cgroup */ preempt_disable(); - WARN_ON_ONCE(from->stat->count[idx] < nr_pages); - __this_cpu_add(from->stat->count[idx], -nr_pages); + __this_cpu_sub(from->stat->count[idx], nr_pages); __this_cpu_add(to->stat->count[idx], nr_pages); preempt_enable(); } @@ -4950,31 +4958,18 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg) } while (usage > 0); } -/* - * This mainly exists for tests during the setting of set of use_hierarchy. - * Since this is the very setting we are changing, the current hierarchy value - * is meaningless - */ -static inline bool __memcg_has_children(struct mem_cgroup *memcg) -{ - struct cgroup_subsys_state *pos; - - /* bounce at first found */ - css_for_each_child(pos, &memcg->css) - return true; - return false; -} - -/* - * Must be called with memcg_create_mutex held, unless the cgroup is guaranteed - * to be already dead (as in mem_cgroup_force_empty, for instance). This is - * from mem_cgroup_count_children(), in the sense that we don't really care how - * many children we have; we only need to know if we have any. It also counts - * any memcg without hierarchy as infertile. - */ static inline bool memcg_has_children(struct mem_cgroup *memcg) { - return memcg->use_hierarchy && __memcg_has_children(memcg); + lockdep_assert_held(&memcg_create_mutex); + /* + * The lock does not prevent addition or deletion to the list + * of children, but it prevents a new child from being + * initialized based on this parent in css_online(), so it's + * enough to decide whether hierarchically inherited + * attributes can still be changed or not. + */ + return memcg->use_hierarchy && + !list_empty(&memcg->css.cgroup->children); } /* @@ -5054,7 +5049,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, */ if ((!parent_memcg || !parent_memcg->use_hierarchy) && (val == 1 || val == 0)) { - if (!__memcg_has_children(memcg)) + if (list_empty(&memcg->css.cgroup->children)) memcg->use_hierarchy = val; else retval = -EBUSY; diff --git a/mm/memory.c b/mm/memory.c index 1311f26497e6..d176154c243f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3521,12 +3521,12 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, } int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, - unsigned long addr, int current_nid) + unsigned long addr, int page_nid) { get_page(page); count_vm_numa_event(NUMA_HINT_FAULTS); - if (current_nid == numa_node_id()) + if (page_nid == numa_node_id()) count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); return mpol_misplaced(page, vma, addr); @@ -3537,7 +3537,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, { struct page *page = NULL; spinlock_t *ptl; - int current_nid = -1; + int page_nid = -1; int target_nid; bool migrated = false; @@ -3567,15 +3567,10 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, return 0; } - current_nid = page_to_nid(page); - target_nid = numa_migrate_prep(page, vma, addr, current_nid); + page_nid = page_to_nid(page); + target_nid = numa_migrate_prep(page, vma, addr, page_nid); pte_unmap_unlock(ptep, ptl); if (target_nid == -1) { - /* - * Account for the fault against the current node if it not - * being replaced regardless of where the page is located. - */ - current_nid = numa_node_id(); put_page(page); goto out; } @@ -3583,11 +3578,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Migrate to the requested node */ migrated = migrate_misplaced_page(page, target_nid); if (migrated) - current_nid = target_nid; + page_nid = target_nid; out: - if (current_nid != -1) - task_numa_fault(current_nid, 1, migrated); + if (page_nid != -1) + task_numa_fault(page_nid, 1, migrated); return 0; } @@ -3602,7 +3597,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long offset; spinlock_t *ptl; bool numa = false; - int local_nid = numa_node_id(); spin_lock(&mm->page_table_lock); pmd = *pmdp; @@ -3625,9 +3619,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { pte_t pteval = *pte; struct page *page; - int curr_nid = local_nid; + int page_nid = -1; int target_nid; - bool migrated; + bool migrated = false; + if (!pte_present(pteval)) continue; if (!pte_numa(pteval)) @@ -3649,25 +3644,19 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(page_mapcount(page) != 1)) continue; - /* - * Note that the NUMA fault is later accounted to either - * the node that is currently running or where the page is - * migrated to. - */ - curr_nid = local_nid; - target_nid = numa_migrate_prep(page, vma, addr, - page_to_nid(page)); - if (target_nid == -1) { + page_nid = page_to_nid(page); + target_nid = numa_migrate_prep(page, vma, addr, page_nid); + pte_unmap_unlock(pte, ptl); + if (target_nid != -1) { + migrated = migrate_misplaced_page(page, target_nid); + if (migrated) + page_nid = target_nid; + } else { put_page(page); - continue; } - /* Migrate to the requested node */ - pte_unmap_unlock(pte, ptl); - migrated = migrate_misplaced_page(page, target_nid); - if (migrated) - curr_nid = target_nid; - task_numa_fault(curr_nid, 1, migrated); + if (page_nid != -1) + task_numa_fault(page_nid, 1, migrated); pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); } diff --git a/mm/migrate.c b/mm/migrate.c index 7a7325ee1d08..c04692774e88 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1715,12 +1715,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, unlock_page(new_page); put_page(new_page); /* Free it */ - unlock_page(page); + /* Retake the callers reference and putback on LRU */ + get_page(page); putback_lru_page(page); - - count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); - isolated = 0; - goto out; + mod_zone_page_state(page_zone(page), + NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); + goto out_fail; } /* @@ -1737,9 +1737,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); entry = pmd_mkhuge(entry); - page_add_new_anon_rmap(new_page, vma, haddr); - + pmdp_clear_flush(vma, haddr, pmd); set_pmd_at(mm, haddr, pmd, entry); + page_add_new_anon_rmap(new_page, vma, haddr); update_mmu_cache_pmd(vma, address, &entry); page_remove_rmap(page); /* @@ -1758,7 +1758,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); -out: mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); @@ -1767,6 +1766,10 @@ out: out_fail: count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); out_dropref: + entry = pmd_mknonnuma(entry); + set_pmd_at(mm, haddr, pmd, entry); + update_mmu_cache_pmd(vma, address, &entry); + unlock_page(page); put_page(page); return 0; diff --git a/mm/mprotect.c b/mm/mprotect.c index a3af058f68e4..412ba2b7326a 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -148,7 +148,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, split_huge_page_pmd(vma, addr, pmd); else if (change_huge_pmd(vma, pmd, addr, newprot, prot_numa)) { - pages += HPAGE_PMD_NR; + pages++; continue; } /* fall through */ diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 5da2cbcfdbb5..2beeabf502c5 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -242,7 +242,7 @@ int walk_page_range(unsigned long addr, unsigned long end, if (err) break; pgd++; - } while (addr = next, addr != end); + } while (addr = next, addr < end); return err; } diff --git a/net/Kconfig b/net/Kconfig index b50dacc072f0..0715db64a5c3 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -220,6 +220,7 @@ source "net/openvswitch/Kconfig" source "net/vmw_vsock/Kconfig" source "net/netlink/Kconfig" source "net/mpls/Kconfig" +source "net/hsr/Kconfig" config RPS boolean diff --git a/net/Makefile b/net/Makefile index 9492e8cb64e9..8fa2f91517f1 100644 --- a/net/Makefile +++ b/net/Makefile @@ -71,3 +71,4 @@ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ obj-$(CONFIG_VSOCKETS) += vmw_vsock/ obj-$(CONFIG_NET_MPLS_GSO) += mpls/ +obj-$(CONFIG_HSR) += hsr/ diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ca04163635da..e6b7fecb3af1 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -64,7 +64,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_flood_deliver(br, skb, false); goto out; } - if (br_multicast_rcv(br, NULL, skb)) { + if (br_multicast_rcv(br, NULL, skb, vid)) { kfree_skb(skb); goto out; } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index a2fd37ec35f7..7e73c32e205d 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -80,7 +80,7 @@ int br_handle_frame_finish(struct sk_buff *skb) br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && - br_multicast_rcv(br, p, skb)) + br_multicast_rcv(br, p, skb, vid)) goto drop; if (p->state == BR_STATE_LEARNING) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 0513ef3ce667..4c214b2b88ef 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -947,7 +947,8 @@ void br_multicast_disable_port(struct net_bridge_port *port) static int br_ip4_multicast_igmp3_report(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct igmpv3_report *ih; struct igmpv3_grec *grec; @@ -957,12 +958,10 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, int type; int err = 0; __be32 group; - u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*ih))) return -EINVAL; - br_vlan_get_tag(skb, &vid); ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); len = sizeof(*ih); @@ -1005,7 +1004,8 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_mld2_report(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct icmp6hdr *icmp6h; struct mld2_grec *grec; @@ -1013,12 +1013,10 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, int len; int num; int err = 0; - u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*icmp6h))) return -EINVAL; - br_vlan_get_tag(skb, &vid); icmp6h = icmp6_hdr(skb); num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); len = sizeof(*icmp6h); @@ -1141,7 +1139,8 @@ static void br_multicast_query_received(struct net_bridge *br, static int br_ip4_multicast_query(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { const struct iphdr *iph = ip_hdr(skb); struct igmphdr *ih = igmp_hdr(skb); @@ -1153,7 +1152,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, unsigned long now = jiffies; __be32 group; int err = 0; - u16 vid = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -1189,7 +1187,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!group) goto out; - br_vlan_get_tag(skb, &vid); mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) goto out; @@ -1219,7 +1216,8 @@ out: #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_query(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct mld_msg *mld; @@ -1231,7 +1229,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, unsigned long now = jiffies; const struct in6_addr *group = NULL; int err = 0; - u16 vid = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -1265,7 +1262,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!group) goto out; - br_vlan_get_tag(skb, &vid); mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) goto out; @@ -1439,7 +1435,8 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, static int br_multicast_ipv4_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct sk_buff *skb2 = skb; const struct iphdr *iph; @@ -1447,7 +1444,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, unsigned int len; unsigned int offset; int err; - u16 vid = 0; /* We treat OOM as packet loss for now. */ if (!pskb_may_pull(skb, sizeof(*iph))) @@ -1508,7 +1504,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = 0; - br_vlan_get_tag(skb2, &vid); BR_INPUT_SKB_CB(skb)->igmp = 1; ih = igmp_hdr(skb2); @@ -1519,10 +1514,10 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = br_ip4_multicast_add_group(br, port, ih->group, vid); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: - err = br_ip4_multicast_igmp3_report(br, port, skb2); + err = br_ip4_multicast_igmp3_report(br, port, skb2, vid); break; case IGMP_HOST_MEMBERSHIP_QUERY: - err = br_ip4_multicast_query(br, port, skb2); + err = br_ip4_multicast_query(br, port, skb2, vid); break; case IGMP_HOST_LEAVE_MESSAGE: br_ip4_multicast_leave_group(br, port, ih->group, vid); @@ -1540,7 +1535,8 @@ err_out: #if IS_ENABLED(CONFIG_IPV6) static int br_multicast_ipv6_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct sk_buff *skb2; const struct ipv6hdr *ip6h; @@ -1550,7 +1546,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, unsigned int len; int offset; int err; - u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*ip6h))) return -EINVAL; @@ -1640,7 +1635,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, err = 0; - br_vlan_get_tag(skb, &vid); BR_INPUT_SKB_CB(skb)->igmp = 1; switch (icmp6_type) { @@ -1657,10 +1651,10 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, break; } case ICMPV6_MLD2_REPORT: - err = br_ip6_multicast_mld2_report(br, port, skb2); + err = br_ip6_multicast_mld2_report(br, port, skb2, vid); break; case ICMPV6_MGM_QUERY: - err = br_ip6_multicast_query(br, port, skb2); + err = br_ip6_multicast_query(br, port, skb2, vid); break; case ICMPV6_MGM_REDUCTION: { @@ -1681,7 +1675,7 @@ out: #endif int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, u16 vid) { BR_INPUT_SKB_CB(skb)->igmp = 0; BR_INPUT_SKB_CB(skb)->mrouters_only = 0; @@ -1691,10 +1685,10 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, switch (skb->protocol) { case htons(ETH_P_IP): - return br_multicast_ipv4_rcv(br, port, skb); + return br_multicast_ipv4_rcv(br, port, skb, vid); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - return br_multicast_ipv6_rcv(br, port, skb); + return br_multicast_ipv6_rcv(br, port, skb, vid); #endif } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 878f008afefa..80cad2cf02a7 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -559,6 +559,8 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb) else if (skb->protocol == htons(ETH_P_PPP_SES)) nf_bridge->mask |= BRNF_PPPoE; + /* Must drop socket now because of tproxy. */ + skb_orphan(skb); return skb->dev; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d1ca6d956633..229d820bdf0b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -435,7 +435,7 @@ int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING extern unsigned int br_mdb_rehash_seq; int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb); + struct sk_buff *skb, u16 vid); struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, struct sk_buff *skb, u16 vid); void br_multicast_add_port(struct net_bridge_port *port); @@ -504,7 +504,8 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br, #else static inline int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { return 0; } diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 68f8128147be..5ca74a0e595f 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -3,6 +3,7 @@ # # config NF_TABLES_BRIDGE + depends on NF_TABLES tristate "Ethernet Bridge nf_tables support" menuconfig BRIDGE_NF_EBTABLES diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 518093802d1d..7c470c371e14 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -181,6 +181,7 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr, ub->qlen++; pm = nlmsg_data(nlh); + memset(pm, 0, sizeof(*pm)); /* Fill in the ulog data */ pm->version = EBT_ULOG_VERSION; @@ -193,8 +194,6 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr, pm->hook = hooknr; if (uloginfo->prefix != NULL) strcpy(pm->prefix, uloginfo->prefix); - else - *(pm->prefix) = '\0'; if (in) { strcpy(pm->physindev, in->name); @@ -204,16 +203,14 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr, strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name); else strcpy(pm->indev, in->name); - } else - pm->indev[0] = pm->physindev[0] = '\0'; + } if (out) { /* If out exists, then out is a bridge port */ strcpy(pm->physoutdev, out->name); /* rcu_read_lock()ed by nf_hook_slow */ strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name); - } else - pm->outdev[0] = pm->physoutdev[0] = '\0'; + } if (skb_copy_bits(skb, -ETH_HLEN, pm->data, copy_len) < 0) BUG(); diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index e8cb016fa34d..cf54b22818c8 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -47,14 +48,50 @@ static struct pernet_operations nf_tables_bridge_net_ops = { .exit = nf_tables_bridge_exit_net, }; +static unsigned int +nft_do_chain_bridge(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, ops, skb, in, out); + + return nft_do_chain_pktinfo(&pkt, ops); +} + +static struct nf_chain_type filter_bridge = { + .family = NFPROTO_BRIDGE, + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .hook_mask = (1 << NF_BR_LOCAL_IN) | + (1 << NF_BR_FORWARD) | + (1 << NF_BR_LOCAL_OUT), + .fn = { + [NF_BR_LOCAL_IN] = nft_do_chain_bridge, + [NF_BR_FORWARD] = nft_do_chain_bridge, + [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, + }, +}; + static int __init nf_tables_bridge_init(void) { - return register_pernet_subsys(&nf_tables_bridge_net_ops); + int ret; + + nft_register_chain_type(&filter_bridge); + ret = register_pernet_subsys(&nf_tables_bridge_net_ops); + if (ret < 0) + nft_unregister_chain_type(&filter_bridge); + + return ret; } static void __exit nf_tables_bridge_exit(void) { - return unregister_pernet_subsys(&nf_tables_bridge_net_ops); + unregister_pernet_subsys(&nf_tables_bridge_net_ops); + nft_unregister_chain_type(&filter_bridge); } module_init(nf_tables_bridge_init); diff --git a/net/core/dev.c b/net/core/dev.c index 0054c8c75f50..0e6136546a8c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6196,6 +6196,16 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, } EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); +void netdev_freemem(struct net_device *dev) +{ + char *addr = (char *)dev - dev->padded; + + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} + /** * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -6239,7 +6249,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, /* ensure 32-byte alignment of whole construct */ alloc_size += NETDEV_ALIGN - 1; - p = kzalloc(alloc_size, GFP_KERNEL); + p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!p) + p = vzalloc(alloc_size); if (!p) return NULL; @@ -6248,7 +6260,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->pcpu_refcnt = alloc_percpu(int); if (!dev->pcpu_refcnt) - goto free_p; + goto free_dev; if (dev_addr_init(dev)) goto free_pcpu; @@ -6301,8 +6313,8 @@ free_pcpu: kfree(dev->_rx); #endif -free_p: - kfree(p); +free_dev: + netdev_freemem(dev); return NULL; } EXPORT_SYMBOL(alloc_netdev_mqs); @@ -6339,7 +6351,7 @@ void free_netdev(struct net_device *dev) /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { - kfree((char *)dev - dev->padded); + netdev_freemem(dev); return; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 5cac36e6ccd1..0242035192f1 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -66,7 +66,7 @@ again: struct iphdr _iph; ip: iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); - if (!iph) + if (!iph || iph->ihl < 5) return false; if (ip_is_fragment(iph)) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index d954b56b4e47..d03f2c9750fa 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1263,7 +1263,7 @@ static void netdev_release(struct device *d) BUG_ON(dev->reg_state != NETREG_RELEASED); kfree(dev->ifalias); - kfree((char *)dev - dev->padded); + netdev_freemem(dev); } static const void *net_namespace(struct device *d) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index fc75c9e461b8..8f971990677c 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -636,8 +636,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo netpoll_send_skb(np, send_skb); - /* If there are several rx_hooks for the same address, - we're fine by sending a single reply */ + /* If there are several rx_skb_hooks for the same + * address we're fine by sending a single reply + */ break; } spin_unlock_irqrestore(&npinfo->rx_lock, flags); @@ -719,8 +720,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo netpoll_send_skb(np, send_skb); - /* If there are several rx_hooks for the same address, - we're fine by sending a single reply */ + /* If there are several rx_skb_hooks for the same + * address, we're fine by sending a single reply + */ break; } spin_unlock_irqrestore(&npinfo->rx_lock, flags); @@ -756,11 +758,12 @@ static bool pkt_is_ns(struct sk_buff *skb) int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) { - int proto, len, ulen; - int hits = 0; + int proto, len, ulen, data_len; + int hits = 0, offset; const struct iphdr *iph; struct udphdr *uh; struct netpoll *np, *tmp; + uint16_t source; if (list_empty(&npinfo->rx_np)) goto out; @@ -820,7 +823,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) len -= iph->ihl*4; uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); + offset = (unsigned char *)(uh + 1) - skb->data; ulen = ntohs(uh->len); + data_len = skb->len - offset; + source = ntohs(uh->source); if (ulen != len) goto out; @@ -834,9 +840,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (np->local_port && np->local_port != ntohs(uh->dest)) continue; - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); + np->rx_skb_hook(np, source, skb, offset, data_len); hits++; } } else { @@ -859,7 +863,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; uh = udp_hdr(skb); + offset = (unsigned char *)(uh + 1) - skb->data; ulen = ntohs(uh->len); + data_len = skb->len - offset; + source = ntohs(uh->source); if (ulen != skb->len) goto out; if (udp6_csum_init(skb, uh, IPPROTO_UDP)) @@ -872,9 +879,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (np->local_port && np->local_port != ntohs(uh->dest)) continue; - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); + np->rx_skb_hook(np, source, skb, offset, data_len); hits++; } #endif @@ -1062,7 +1067,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) npinfo->netpoll = np; - if (np->rx_hook) { + if (np->rx_skb_hook) { spin_lock_irqsave(&npinfo->rx_lock, flags); npinfo->rx_flags |= NETPOLL_RX_ENABLED; list_add_tail(&np->rx, &npinfo->rx_np); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0ab32faa520f..e4115597b38b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1928,9 +1928,8 @@ fault: EXPORT_SYMBOL(skb_store_bits); /* Checksum skb data. */ - -__wsum skb_checksum(const struct sk_buff *skb, int offset, - int len, __wsum csum) +__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, + __wsum csum, const struct skb_checksum_ops *ops) { int start = skb_headlen(skb); int i, copy = start - offset; @@ -1941,7 +1940,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, if (copy > 0) { if (copy > len) copy = len; - csum = csum_partial(skb->data + offset, copy, csum); + csum = ops->update(skb->data + offset, copy, csum); if ((len -= copy) == 0) return csum; offset += copy; @@ -1962,10 +1961,10 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, if (copy > len) copy = len; vaddr = kmap_atomic(skb_frag_page(frag)); - csum2 = csum_partial(vaddr + frag->page_offset + - offset - start, copy, 0); + csum2 = ops->update(vaddr + frag->page_offset + + offset - start, copy, 0); kunmap_atomic(vaddr); - csum = csum_block_add(csum, csum2, pos); + csum = ops->combine(csum, csum2, pos, copy); if (!(len -= copy)) return csum; offset += copy; @@ -1984,9 +1983,9 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, __wsum csum2; if (copy > len) copy = len; - csum2 = skb_checksum(frag_iter, offset - start, - copy, 0); - csum = csum_block_add(csum, csum2, pos); + csum2 = __skb_checksum(frag_iter, offset - start, + copy, 0, ops); + csum = ops->combine(csum, csum2, pos, copy); if ((len -= copy) == 0) return csum; offset += copy; @@ -1998,6 +1997,18 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, return csum; } +EXPORT_SYMBOL(__skb_checksum); + +__wsum skb_checksum(const struct sk_buff *skb, int offset, + int len, __wsum csum) +{ + const struct skb_checksum_ops ops = { + .update = csum_partial_ext, + .combine = csum_block_add_ext, + }; + + return __skb_checksum(skb, offset, len, csum, &ops); +} EXPORT_SYMBOL(skb_checksum); /* Both of above in one bottle. */ diff --git a/net/hsr/Kconfig b/net/hsr/Kconfig new file mode 100644 index 000000000000..0d3d709052ca --- /dev/null +++ b/net/hsr/Kconfig @@ -0,0 +1,27 @@ +# +# IEC 62439-3 High-availability Seamless Redundancy +# + +config HSR + tristate "High-availability Seamless Redundancy (HSR)" + ---help--- + If you say Y here, then your Linux box will be able to act as a + DANH ("Doubly attached node implementing HSR"). For this to work, + your Linux box needs (at least) two physical Ethernet interfaces, + and it must be connected as a node in a ring network together with + other HSR capable nodes. + + All Ethernet frames sent over the hsr device will be sent in both + directions on the ring (over both slave ports), giving a redundant, + instant fail-over network. Each HSR node in the ring acts like a + bridge for HSR frames, but filters frames that have been forwarded + earlier. + + This code is a "best effort" to comply with the HSR standard as + described in IEC 62439-3:2010 (HSRv0), but no compliancy tests have + been made. + + You need to perform any and all necessary tests yourself before + relying on this code in a safety critical system! + + If unsure, say N. diff --git a/net/hsr/Makefile b/net/hsr/Makefile new file mode 100644 index 000000000000..b68359f181cc --- /dev/null +++ b/net/hsr/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for HSR +# + +obj-$(CONFIG_HSR) += hsr.o + +hsr-y := hsr_main.o hsr_framereg.o hsr_device.o hsr_netlink.o diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c new file mode 100644 index 000000000000..cac505f166d5 --- /dev/null +++ b/net/hsr/hsr_device.c @@ -0,0 +1,596 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * + * This file contains device methods for creating, using and destroying + * virtual HSR devices. + */ + +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/if_arp.h> +#include <linux/rtnetlink.h> +#include <linux/pkt_sched.h> +#include "hsr_device.h" +#include "hsr_framereg.h" +#include "hsr_main.h" + + +static bool is_admin_up(struct net_device *dev) +{ + return dev && (dev->flags & IFF_UP); +} + +static bool is_slave_up(struct net_device *dev) +{ + return dev && is_admin_up(dev) && netif_oper_up(dev); +} + +static void __hsr_set_operstate(struct net_device *dev, int transition) +{ + write_lock_bh(&dev_base_lock); + if (dev->operstate != transition) { + dev->operstate = transition; + write_unlock_bh(&dev_base_lock); + netdev_state_change(dev); + } else { + write_unlock_bh(&dev_base_lock); + } +} + +void hsr_set_operstate(struct net_device *hsr_dev, struct net_device *slave1, + struct net_device *slave2) +{ + if (!is_admin_up(hsr_dev)) { + __hsr_set_operstate(hsr_dev, IF_OPER_DOWN); + return; + } + + if (is_slave_up(slave1) || is_slave_up(slave2)) + __hsr_set_operstate(hsr_dev, IF_OPER_UP); + else + __hsr_set_operstate(hsr_dev, IF_OPER_LOWERLAYERDOWN); +} + +void hsr_set_carrier(struct net_device *hsr_dev, struct net_device *slave1, + struct net_device *slave2) +{ + if (is_slave_up(slave1) || is_slave_up(slave2)) + netif_carrier_on(hsr_dev); + else + netif_carrier_off(hsr_dev); +} + + +void hsr_check_announce(struct net_device *hsr_dev, int old_operstate) +{ + struct hsr_priv *hsr_priv; + + hsr_priv = netdev_priv(hsr_dev); + + if ((hsr_dev->operstate == IF_OPER_UP) && (old_operstate != IF_OPER_UP)) { + /* Went up */ + hsr_priv->announce_count = 0; + hsr_priv->announce_timer.expires = jiffies + + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); + add_timer(&hsr_priv->announce_timer); + } + + if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP)) + /* Went down */ + del_timer(&hsr_priv->announce_timer); +} + + +int hsr_get_max_mtu(struct hsr_priv *hsr_priv) +{ + int mtu_max; + + if (hsr_priv->slave[0] && hsr_priv->slave[1]) + mtu_max = min(hsr_priv->slave[0]->mtu, hsr_priv->slave[1]->mtu); + else if (hsr_priv->slave[0]) + mtu_max = hsr_priv->slave[0]->mtu; + else if (hsr_priv->slave[1]) + mtu_max = hsr_priv->slave[1]->mtu; + else + mtu_max = HSR_TAGLEN; + + return mtu_max - HSR_TAGLEN; +} + +static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu) +{ + struct hsr_priv *hsr_priv; + + hsr_priv = netdev_priv(dev); + + if (new_mtu > hsr_get_max_mtu(hsr_priv)) { + netdev_info(hsr_priv->dev, "A HSR master's MTU cannot be greater than the smallest MTU of its slaves minus the HSR Tag length (%d octets).\n", + HSR_TAGLEN); + return -EINVAL; + } + + dev->mtu = new_mtu; + + return 0; +} + +static int hsr_dev_open(struct net_device *dev) +{ + struct hsr_priv *hsr_priv; + int i; + char *slave_name; + + hsr_priv = netdev_priv(dev); + + for (i = 0; i < HSR_MAX_SLAVE; i++) { + if (hsr_priv->slave[i]) + slave_name = hsr_priv->slave[i]->name; + else + slave_name = "null"; + + if (!is_slave_up(hsr_priv->slave[i])) + netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a working HSR network\n", + 'A' + i, slave_name); + } + + return 0; +} + +static int hsr_dev_close(struct net_device *dev) +{ + /* Nothing to do here. We could try to restore the state of the slaves + * to what they were before being changed by the hsr master dev's state, + * but they might have been changed manually in the mean time too, so + * taking them up or down here might be confusing and is probably not a + * good idea. + */ + return 0; +} + + +static void hsr_fill_tag(struct hsr_ethhdr *hsr_ethhdr, struct hsr_priv *hsr_priv) +{ + unsigned long irqflags; + + /* IEC 62439-1:2010, p 48, says the 4-bit "path" field can take values + * between 0001-1001 ("ring identifier", for regular HSR frames), + * or 1111 ("HSR management", supervision frames). Unfortunately, the + * spec writers forgot to explain what a "ring identifier" is, or + * how it is used. So we just set this to 0001 for regular frames, + * and 1111 for supervision frames. + */ + set_hsr_tag_path(&hsr_ethhdr->hsr_tag, 0x1); + + /* IEC 62439-1:2010, p 12: "The link service data unit in an Ethernet + * frame is the content of the frame located between the Length/Type + * field and the Frame Check Sequence." + * + * IEC 62439-3, p 48, specifies the "original LPDU" to include the + * original "LT" field (what "LT" means is not explained anywhere as + * far as I can see - perhaps "Length/Type"?). So LSDU_size might + * equal original length + 2. + * Also, the fact that this field is not used anywhere (might be used + * by a RedBox connecting HSR and PRP nets?) means I cannot test its + * correctness. Instead of guessing, I set this to 0 here, to make any + * problems immediately apparent. Anyone using this driver with PRP/HSR + * RedBoxes might need to fix this... + */ + set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, 0); + + spin_lock_irqsave(&hsr_priv->seqnr_lock, irqflags); + hsr_ethhdr->hsr_tag.sequence_nr = htons(hsr_priv->sequence_nr); + hsr_priv->sequence_nr++; + spin_unlock_irqrestore(&hsr_priv->seqnr_lock, irqflags); + + hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; + + hsr_ethhdr->ethhdr.h_proto = htons(ETH_P_PRP); +} + +static int slave_xmit(struct sk_buff *skb, struct hsr_priv *hsr_priv, + enum hsr_dev_idx dev_idx) +{ + struct hsr_ethhdr *hsr_ethhdr; + + hsr_ethhdr = (struct hsr_ethhdr *) skb->data; + + skb->dev = hsr_priv->slave[dev_idx]; + + hsr_addr_subst_dest(hsr_priv, &hsr_ethhdr->ethhdr, dev_idx); + + /* Address substitution (IEC62439-3 pp 26, 50): replace mac + * address of outgoing frame with that of the outgoing slave's. + */ + memcpy(hsr_ethhdr->ethhdr.h_source, skb->dev->dev_addr, ETH_ALEN); + + return dev_queue_xmit(skb); +} + + +static int hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct hsr_priv *hsr_priv; + struct hsr_ethhdr *hsr_ethhdr; + struct sk_buff *skb2; + int res1, res2; + + hsr_priv = netdev_priv(dev); + hsr_ethhdr = (struct hsr_ethhdr *) skb->data; + + if ((skb->protocol != htons(ETH_P_PRP)) || + (hsr_ethhdr->ethhdr.h_proto != htons(ETH_P_PRP))) { + hsr_fill_tag(hsr_ethhdr, hsr_priv); + skb->protocol = htons(ETH_P_PRP); + } + + skb2 = pskb_copy(skb, GFP_ATOMIC); + + res1 = NET_XMIT_DROP; + if (likely(hsr_priv->slave[HSR_DEV_SLAVE_A])) + res1 = slave_xmit(skb, hsr_priv, HSR_DEV_SLAVE_A); + + res2 = NET_XMIT_DROP; + if (likely(skb2 && hsr_priv->slave[HSR_DEV_SLAVE_B])) + res2 = slave_xmit(skb2, hsr_priv, HSR_DEV_SLAVE_B); + + if (likely(res1 == NET_XMIT_SUCCESS || res1 == NET_XMIT_CN || + res2 == NET_XMIT_SUCCESS || res2 == NET_XMIT_CN)) { + hsr_priv->dev->stats.tx_packets++; + hsr_priv->dev->stats.tx_bytes += skb->len; + } else { + hsr_priv->dev->stats.tx_dropped++; + } + + return NETDEV_TX_OK; +} + + +static int hsr_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) +{ + int res; + + /* Make room for the HSR tag now. We will fill it in later (in + * hsr_dev_xmit) + */ + if (skb_headroom(skb) < HSR_TAGLEN + ETH_HLEN) + return -ENOBUFS; + skb_push(skb, HSR_TAGLEN); + + /* To allow VLAN/HSR combos we should probably use + * res = dev_hard_header(skb, dev, type, daddr, saddr, len + HSR_TAGLEN); + * here instead. It would require other changes too, though - e.g. + * separate headers for each slave etc... + */ + res = eth_header(skb, dev, type, daddr, saddr, len + HSR_TAGLEN); + if (res <= 0) + return res; + skb_reset_mac_header(skb); + + return res + HSR_TAGLEN; +} + + +static const struct header_ops hsr_header_ops = { + .create = hsr_header_create, + .parse = eth_header_parse, +}; + + +/* HSR:2010 supervision frames should be padded so that the whole frame, + * including headers and FCS, is 64 bytes (without VLAN). + */ +static int hsr_pad(int size) +{ + const int min_size = ETH_ZLEN - HSR_TAGLEN - ETH_HLEN; + + if (size >= min_size) + return size; + return min_size; +} + +static void send_hsr_supervision_frame(struct net_device *hsr_dev, u8 type) +{ + struct hsr_priv *hsr_priv; + struct sk_buff *skb; + int hlen, tlen; + struct hsr_sup_tag *hsr_stag; + struct hsr_sup_payload *hsr_sp; + unsigned long irqflags; + + hlen = LL_RESERVED_SPACE(hsr_dev); + tlen = hsr_dev->needed_tailroom; + skb = alloc_skb(hsr_pad(sizeof(struct hsr_sup_payload)) + hlen + tlen, + GFP_ATOMIC); + + if (skb == NULL) + return; + + hsr_priv = netdev_priv(hsr_dev); + + skb_reserve(skb, hlen); + + skb->dev = hsr_dev; + skb->protocol = htons(ETH_P_PRP); + skb->priority = TC_PRIO_CONTROL; + + if (dev_hard_header(skb, skb->dev, ETH_P_PRP, + hsr_priv->sup_multicast_addr, + skb->dev->dev_addr, skb->len) < 0) + goto out; + + skb_pull(skb, sizeof(struct ethhdr)); + hsr_stag = (typeof(hsr_stag)) skb->data; + + set_hsr_stag_path(hsr_stag, 0xf); + set_hsr_stag_HSR_Ver(hsr_stag, 0); + + spin_lock_irqsave(&hsr_priv->seqnr_lock, irqflags); + hsr_stag->sequence_nr = htons(hsr_priv->sequence_nr); + hsr_priv->sequence_nr++; + spin_unlock_irqrestore(&hsr_priv->seqnr_lock, irqflags); + + hsr_stag->HSR_TLV_Type = type; + hsr_stag->HSR_TLV_Length = 12; + + skb_push(skb, sizeof(struct ethhdr)); + + /* Payload: MacAddressA */ + hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(*hsr_sp)); + memcpy(hsr_sp->MacAddressA, hsr_dev->dev_addr, ETH_ALEN); + + dev_queue_xmit(skb); + return; + +out: + kfree_skb(skb); +} + + +/* Announce (supervision frame) timer function + */ +static void hsr_announce(unsigned long data) +{ + struct hsr_priv *hsr_priv; + + hsr_priv = (struct hsr_priv *) data; + + if (hsr_priv->announce_count < 3) { + send_hsr_supervision_frame(hsr_priv->dev, HSR_TLV_ANNOUNCE); + hsr_priv->announce_count++; + } else { + send_hsr_supervision_frame(hsr_priv->dev, HSR_TLV_LIFE_CHECK); + } + + if (hsr_priv->announce_count < 3) + hsr_priv->announce_timer.expires = jiffies + + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); + else + hsr_priv->announce_timer.expires = jiffies + + msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); + + if (is_admin_up(hsr_priv->dev)) + add_timer(&hsr_priv->announce_timer); +} + + +static void restore_slaves(struct net_device *hsr_dev) +{ + struct hsr_priv *hsr_priv; + int i; + int res; + + hsr_priv = netdev_priv(hsr_dev); + + rtnl_lock(); + + /* Restore promiscuity */ + for (i = 0; i < HSR_MAX_SLAVE; i++) { + if (!hsr_priv->slave[i]) + continue; + res = dev_set_promiscuity(hsr_priv->slave[i], -1); + if (res) + netdev_info(hsr_dev, + "Cannot restore slave promiscuity (%s, %d)\n", + hsr_priv->slave[i]->name, res); + } + + rtnl_unlock(); +} + +static void reclaim_hsr_dev(struct rcu_head *rh) +{ + struct hsr_priv *hsr_priv; + + hsr_priv = container_of(rh, struct hsr_priv, rcu_head); + free_netdev(hsr_priv->dev); +} + + +/* According to comments in the declaration of struct net_device, this function + * is "Called from unregister, can be used to call free_netdev". Ok then... + */ +static void hsr_dev_destroy(struct net_device *hsr_dev) +{ + struct hsr_priv *hsr_priv; + + hsr_priv = netdev_priv(hsr_dev); + + del_timer(&hsr_priv->announce_timer); + unregister_hsr_master(hsr_priv); /* calls list_del_rcu on hsr_priv */ + restore_slaves(hsr_dev); + call_rcu(&hsr_priv->rcu_head, reclaim_hsr_dev); /* reclaim hsr_priv */ +} + +static const struct net_device_ops hsr_device_ops = { + .ndo_change_mtu = hsr_dev_change_mtu, + .ndo_open = hsr_dev_open, + .ndo_stop = hsr_dev_close, + .ndo_start_xmit = hsr_dev_xmit, +}; + + +void hsr_dev_setup(struct net_device *dev) +{ + random_ether_addr(dev->dev_addr); + + ether_setup(dev); + dev->header_ops = &hsr_header_ops; + dev->netdev_ops = &hsr_device_ops; + dev->tx_queue_len = 0; + + dev->destructor = hsr_dev_destroy; +} + + +/* Return true if dev is a HSR master; return false otherwise. + */ +bool is_hsr_master(struct net_device *dev) +{ + return (dev->netdev_ops->ndo_start_xmit == hsr_dev_xmit); +} + +static int check_slave_ok(struct net_device *dev) +{ + /* Don't allow HSR on non-ethernet like devices */ + if ((dev->flags & IFF_LOOPBACK) || (dev->type != ARPHRD_ETHER) || + (dev->addr_len != ETH_ALEN)) { + netdev_info(dev, "Cannot use loopback or non-ethernet device as HSR slave.\n"); + return -EINVAL; + } + + /* Don't allow enslaving hsr devices */ + if (is_hsr_master(dev)) { + netdev_info(dev, "Cannot create trees of HSR devices.\n"); + return -EINVAL; + } + + if (is_hsr_slave(dev)) { + netdev_info(dev, "This device is already a HSR slave.\n"); + return -EINVAL; + } + + if (dev->priv_flags & IFF_802_1Q_VLAN) { + netdev_info(dev, "HSR on top of VLAN is not yet supported in this driver.\n"); + return -EINVAL; + } + + /* HSR over bonded devices has not been tested, but I'm not sure it + * won't work... + */ + + return 0; +} + + +/* Default multicast address for HSR Supervision frames */ +static const unsigned char def_multicast_addr[ETH_ALEN] = { + 0x01, 0x15, 0x4e, 0x00, 0x01, 0x00 +}; + +int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], + unsigned char multicast_spec) +{ + struct hsr_priv *hsr_priv; + int i; + int res; + + hsr_priv = netdev_priv(hsr_dev); + hsr_priv->dev = hsr_dev; + INIT_LIST_HEAD(&hsr_priv->node_db); + INIT_LIST_HEAD(&hsr_priv->self_node_db); + for (i = 0; i < HSR_MAX_SLAVE; i++) + hsr_priv->slave[i] = slave[i]; + + spin_lock_init(&hsr_priv->seqnr_lock); + /* Overflow soon to find bugs easier: */ + hsr_priv->sequence_nr = USHRT_MAX - 1024; + + init_timer(&hsr_priv->announce_timer); + hsr_priv->announce_timer.function = hsr_announce; + hsr_priv->announce_timer.data = (unsigned long) hsr_priv; + + memcpy(hsr_priv->sup_multicast_addr, def_multicast_addr, ETH_ALEN); + hsr_priv->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; + +/* FIXME: should I modify the value of these? + * + * - hsr_dev->flags - i.e. + * IFF_MASTER/SLAVE? + * - hsr_dev->priv_flags - i.e. + * IFF_EBRIDGE? + * IFF_TX_SKB_SHARING? + * IFF_HSR_MASTER/SLAVE? + */ + + for (i = 0; i < HSR_MAX_SLAVE; i++) { + res = check_slave_ok(slave[i]); + if (res) + return res; + } + + hsr_dev->features = slave[0]->features & slave[1]->features; + /* Prevent recursive tx locking */ + hsr_dev->features |= NETIF_F_LLTX; + /* VLAN on top of HSR needs testing and probably some work on + * hsr_header_create() etc. + */ + hsr_dev->features |= NETIF_F_VLAN_CHALLENGED; + + /* Set hsr_dev's MAC address to that of mac_slave1 */ + memcpy(hsr_dev->dev_addr, hsr_priv->slave[0]->dev_addr, ETH_ALEN); + + /* Set required header length */ + for (i = 0; i < HSR_MAX_SLAVE; i++) { + if (slave[i]->hard_header_len + HSR_TAGLEN > + hsr_dev->hard_header_len) + hsr_dev->hard_header_len = + slave[i]->hard_header_len + HSR_TAGLEN; + } + + /* MTU */ + for (i = 0; i < HSR_MAX_SLAVE; i++) + if (slave[i]->mtu - HSR_TAGLEN < hsr_dev->mtu) + hsr_dev->mtu = slave[i]->mtu - HSR_TAGLEN; + + /* Make sure the 1st call to netif_carrier_on() gets through */ + netif_carrier_off(hsr_dev); + + /* Promiscuity */ + for (i = 0; i < HSR_MAX_SLAVE; i++) { + res = dev_set_promiscuity(slave[i], 1); + if (res) { + netdev_info(hsr_dev, "Cannot set slave promiscuity (%s, %d)\n", + slave[i]->name, res); + goto fail; + } + } + + /* Make sure we recognize frames from ourselves in hsr_rcv() */ + res = hsr_create_self_node(&hsr_priv->self_node_db, + hsr_dev->dev_addr, + hsr_priv->slave[1]->dev_addr); + if (res < 0) + goto fail; + + res = register_netdevice(hsr_dev); + if (res) + goto fail; + + register_hsr_master(hsr_priv); + + return 0; + +fail: + restore_slaves(hsr_dev); + return res; +} diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h new file mode 100644 index 000000000000..2c7148e73914 --- /dev/null +++ b/net/hsr/hsr_device.h @@ -0,0 +1,29 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + */ + +#ifndef __HSR_DEVICE_H +#define __HSR_DEVICE_H + +#include <linux/netdevice.h> +#include "hsr_main.h" + +void hsr_dev_setup(struct net_device *dev); +int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], + unsigned char multicast_spec); +void hsr_set_operstate(struct net_device *hsr_dev, struct net_device *slave1, + struct net_device *slave2); +void hsr_set_carrier(struct net_device *hsr_dev, struct net_device *slave1, + struct net_device *slave2); +void hsr_check_announce(struct net_device *hsr_dev, int old_operstate); +bool is_hsr_master(struct net_device *dev); +int hsr_get_max_mtu(struct hsr_priv *hsr_priv); + +#endif /* __HSR_DEVICE_H */ diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c new file mode 100644 index 000000000000..003f5bb3acd2 --- /dev/null +++ b/net/hsr/hsr_framereg.c @@ -0,0 +1,503 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * + * The HSR spec says never to forward the same frame twice on the same + * interface. A frame is identified by its source MAC address and its HSR + * sequence number. This code keeps track of senders and their sequence numbers + * to allow filtering of duplicate frames, and to detect HSR ring errors. + */ + +#include <linux/if_ether.h> +#include <linux/etherdevice.h> +#include <linux/slab.h> +#include <linux/rculist.h> +#include "hsr_main.h" +#include "hsr_framereg.h" +#include "hsr_netlink.h" + + +struct node_entry { + struct list_head mac_list; + unsigned char MacAddressA[ETH_ALEN]; + unsigned char MacAddressB[ETH_ALEN]; + enum hsr_dev_idx AddrB_if; /* The local slave through which AddrB + * frames are received from this node + */ + unsigned long time_in[HSR_MAX_SLAVE]; + bool time_in_stale[HSR_MAX_SLAVE]; + u16 seq_out[HSR_MAX_DEV]; + struct rcu_head rcu_head; +}; + +/* TODO: use hash lists for mac addresses (linux/jhash.h)? */ + + + +/* Search for mac entry. Caller must hold rcu read lock. + */ +static struct node_entry *find_node_by_AddrA(struct list_head *node_db, + const unsigned char addr[ETH_ALEN]) +{ + struct node_entry *node; + + list_for_each_entry_rcu(node, node_db, mac_list) { + if (ether_addr_equal(node->MacAddressA, addr)) + return node; + } + + return NULL; +} + + +/* Search for mac entry. Caller must hold rcu read lock. + */ +static struct node_entry *find_node_by_AddrB(struct list_head *node_db, + const unsigned char addr[ETH_ALEN]) +{ + struct node_entry *node; + + list_for_each_entry_rcu(node, node_db, mac_list) { + if (ether_addr_equal(node->MacAddressB, addr)) + return node; + } + + return NULL; +} + + +/* Search for mac entry. Caller must hold rcu read lock. + */ +struct node_entry *hsr_find_node(struct list_head *node_db, struct sk_buff *skb) +{ + struct node_entry *node; + struct ethhdr *ethhdr; + + if (!skb_mac_header_was_set(skb)) + return NULL; + + ethhdr = (struct ethhdr *) skb_mac_header(skb); + + list_for_each_entry_rcu(node, node_db, mac_list) { + if (ether_addr_equal(node->MacAddressA, ethhdr->h_source)) + return node; + if (ether_addr_equal(node->MacAddressB, ethhdr->h_source)) + return node; + } + + return NULL; +} + + +/* Helper for device init; the self_node_db is used in hsr_rcv() to recognize + * frames from self that's been looped over the HSR ring. + */ +int hsr_create_self_node(struct list_head *self_node_db, + unsigned char addr_a[ETH_ALEN], + unsigned char addr_b[ETH_ALEN]) +{ + struct node_entry *node, *oldnode; + + node = kmalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + memcpy(node->MacAddressA, addr_a, ETH_ALEN); + memcpy(node->MacAddressB, addr_b, ETH_ALEN); + + rcu_read_lock(); + oldnode = list_first_or_null_rcu(self_node_db, + struct node_entry, mac_list); + if (oldnode) { + list_replace_rcu(&oldnode->mac_list, &node->mac_list); + rcu_read_unlock(); + synchronize_rcu(); + kfree(oldnode); + } else { + rcu_read_unlock(); + list_add_tail_rcu(&node->mac_list, self_node_db); + } + + return 0; +} + +static void node_entry_reclaim(struct rcu_head *rh) +{ + kfree(container_of(rh, struct node_entry, rcu_head)); +} + + +/* Add/merge node to the database of nodes. 'skb' must contain an HSR + * supervision frame. + * - If the supervision header's MacAddressA field is not yet in the database, + * this frame is from an hitherto unknown node - add it to the database. + * - If the sender's MAC address is not the same as its MacAddressA address, + * the node is using PICS_SUBS (address substitution). Record the sender's + * address as the node's MacAddressB. + * + * This function needs to work even if the sender node has changed one of its + * slaves' MAC addresses. In this case, there are four different cases described + * by (Addr-changed, received-from) pairs as follows. Note that changing the + * SlaveA address is equal to changing the node's own address: + * + * - (AddrB, SlaveB): The new AddrB will be recorded by PICS_SUBS code since + * node == NULL. + * - (AddrB, SlaveA): Will work as usual (the AddrB change won't be detected + * from this frame). + * + * - (AddrA, SlaveB): The old node will be found. We need to detect this and + * remove the node. + * - (AddrA, SlaveA): A new node will be registered (non-PICS_SUBS at first). + * The old one will be pruned after HSR_NODE_FORGET_TIME. + * + * We also need to detect if the sender's SlaveA and SlaveB cables have been + * swapped. + */ +struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, + struct node_entry *node, + struct sk_buff *skb, + enum hsr_dev_idx dev_idx) +{ + struct hsr_sup_payload *hsr_sp; + struct hsr_ethhdr_sp *hsr_ethsup; + int i; + unsigned long now; + + hsr_ethsup = (struct hsr_ethhdr_sp *) skb_mac_header(skb); + hsr_sp = (struct hsr_sup_payload *) skb->data; + + if (node && !ether_addr_equal(node->MacAddressA, hsr_sp->MacAddressA)) { + /* Node has changed its AddrA, frame was received from SlaveB */ + list_del_rcu(&node->mac_list); + call_rcu(&node->rcu_head, node_entry_reclaim); + node = NULL; + } + + if (node && (dev_idx == node->AddrB_if) && + !ether_addr_equal(node->MacAddressB, hsr_ethsup->ethhdr.h_source)) { + /* Cables have been swapped */ + list_del_rcu(&node->mac_list); + call_rcu(&node->rcu_head, node_entry_reclaim); + node = NULL; + } + + if (node && (dev_idx != node->AddrB_if) && + (node->AddrB_if != HSR_DEV_NONE) && + !ether_addr_equal(node->MacAddressA, hsr_ethsup->ethhdr.h_source)) { + /* Cables have been swapped */ + list_del_rcu(&node->mac_list); + call_rcu(&node->rcu_head, node_entry_reclaim); + node = NULL; + } + + if (node) + return node; + + node = find_node_by_AddrA(&hsr_priv->node_db, hsr_sp->MacAddressA); + if (node) { + /* Node is known, but frame was received from an unknown + * address. Node is PICS_SUBS capable; merge its AddrB. + */ + memcpy(node->MacAddressB, hsr_ethsup->ethhdr.h_source, ETH_ALEN); + node->AddrB_if = dev_idx; + return node; + } + + node = kzalloc(sizeof(*node), GFP_ATOMIC); + if (!node) + return NULL; + + memcpy(node->MacAddressA, hsr_sp->MacAddressA, ETH_ALEN); + memcpy(node->MacAddressB, hsr_ethsup->ethhdr.h_source, ETH_ALEN); + if (!ether_addr_equal(hsr_sp->MacAddressA, hsr_ethsup->ethhdr.h_source)) + node->AddrB_if = dev_idx; + else + node->AddrB_if = HSR_DEV_NONE; + + /* We are only interested in time diffs here, so use current jiffies + * as initialization. (0 could trigger an spurious ring error warning). + */ + now = jiffies; + for (i = 0; i < HSR_MAX_SLAVE; i++) + node->time_in[i] = now; + for (i = 0; i < HSR_MAX_DEV; i++) + node->seq_out[i] = ntohs(hsr_ethsup->hsr_sup.sequence_nr) - 1; + + list_add_tail_rcu(&node->mac_list, &hsr_priv->node_db); + + return node; +} + + +/* 'skb' is a frame meant for this host, that is to be passed to upper layers. + * + * If the frame was sent by a node's B interface, replace the sender + * address with that node's "official" address (MacAddressA) so that upper + * layers recognize where it came from. + */ +void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb) +{ + struct ethhdr *ethhdr; + struct node_entry *node; + + if (!skb_mac_header_was_set(skb)) { + WARN_ONCE(1, "%s: Mac header not set\n", __func__); + return; + } + ethhdr = (struct ethhdr *) skb_mac_header(skb); + + rcu_read_lock(); + node = find_node_by_AddrB(&hsr_priv->node_db, ethhdr->h_source); + if (node) + memcpy(ethhdr->h_source, node->MacAddressA, ETH_ALEN); + rcu_read_unlock(); +} + + +/* 'skb' is a frame meant for another host. + * 'hsr_dev_idx' is the HSR index of the outgoing device + * + * Substitute the target (dest) MAC address if necessary, so the it matches the + * recipient interface MAC address, regardless of whether that is the + * recipient's A or B interface. + * This is needed to keep the packets flowing through switches that learn on + * which "side" the different interfaces are. + */ +void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr, + enum hsr_dev_idx dev_idx) +{ + struct node_entry *node; + + rcu_read_lock(); + node = find_node_by_AddrA(&hsr_priv->node_db, ethhdr->h_dest); + if (node && (node->AddrB_if == dev_idx)) + memcpy(ethhdr->h_dest, node->MacAddressB, ETH_ALEN); + rcu_read_unlock(); +} + + +/* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b, + * false otherwise. + */ +static bool seq_nr_after(u16 a, u16 b) +{ + /* Remove inconsistency where + * seq_nr_after(a, b) == seq_nr_before(a, b) */ + if ((int) b - a == 32768) + return false; + + return (((s16) (b - a)) < 0); +} +#define seq_nr_before(a, b) seq_nr_after((b), (a)) +#define seq_nr_after_or_eq(a, b) (!seq_nr_before((a), (b))) +#define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b))) + + +void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx) +{ + if ((dev_idx < 0) || (dev_idx >= HSR_MAX_DEV)) { + WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx); + return; + } + node->time_in[dev_idx] = jiffies; + node->time_in_stale[dev_idx] = false; +} + + +/* 'skb' is a HSR Ethernet frame (with a HSR tag inserted), with a valid + * ethhdr->h_source address and skb->mac_header set. + * + * Return: + * 1 if frame can be shown to have been sent recently on this interface, + * 0 otherwise, or + * negative error code on error + */ +int hsr_register_frame_out(struct node_entry *node, enum hsr_dev_idx dev_idx, + struct sk_buff *skb) +{ + struct hsr_ethhdr *hsr_ethhdr; + u16 sequence_nr; + + if ((dev_idx < 0) || (dev_idx >= HSR_MAX_DEV)) { + WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx); + return -EINVAL; + } + if (!skb_mac_header_was_set(skb)) { + WARN_ONCE(1, "%s: Mac header not set\n", __func__); + return -EINVAL; + } + hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb); + + sequence_nr = ntohs(hsr_ethhdr->hsr_tag.sequence_nr); + if (seq_nr_before_or_eq(sequence_nr, node->seq_out[dev_idx])) + return 1; + + node->seq_out[dev_idx] = sequence_nr; + return 0; +} + + + +static bool is_late(struct node_entry *node, enum hsr_dev_idx dev_idx) +{ + enum hsr_dev_idx other; + + if (node->time_in_stale[dev_idx]) + return true; + + if (dev_idx == HSR_DEV_SLAVE_A) + other = HSR_DEV_SLAVE_B; + else + other = HSR_DEV_SLAVE_A; + + if (node->time_in_stale[other]) + return false; + + if (time_after(node->time_in[other], node->time_in[dev_idx] + + msecs_to_jiffies(MAX_SLAVE_DIFF))) + return true; + + return false; +} + + +/* Remove stale sequence_nr records. Called by timer every + * HSR_LIFE_CHECK_INTERVAL (two seconds or so). + */ +void hsr_prune_nodes(struct hsr_priv *hsr_priv) +{ + struct node_entry *node; + unsigned long timestamp; + unsigned long time_a, time_b; + + rcu_read_lock(); + list_for_each_entry_rcu(node, &hsr_priv->node_db, mac_list) { + /* Shorthand */ + time_a = node->time_in[HSR_DEV_SLAVE_A]; + time_b = node->time_in[HSR_DEV_SLAVE_B]; + + /* Check for timestamps old enough to risk wrap-around */ + if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET/2)) + node->time_in_stale[HSR_DEV_SLAVE_A] = true; + if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET/2)) + node->time_in_stale[HSR_DEV_SLAVE_B] = true; + + /* Get age of newest frame from node. + * At least one time_in is OK here; nodes get pruned long + * before both time_ins can get stale + */ + timestamp = time_a; + if (node->time_in_stale[HSR_DEV_SLAVE_A] || + (!node->time_in_stale[HSR_DEV_SLAVE_B] && + time_after(time_b, time_a))) + timestamp = time_b; + + /* Warn of ring error only as long as we get frames at all */ + if (time_is_after_jiffies(timestamp + + msecs_to_jiffies(1.5*MAX_SLAVE_DIFF))) { + + if (is_late(node, HSR_DEV_SLAVE_A)) + hsr_nl_ringerror(hsr_priv, node->MacAddressA, + HSR_DEV_SLAVE_A); + else if (is_late(node, HSR_DEV_SLAVE_B)) + hsr_nl_ringerror(hsr_priv, node->MacAddressA, + HSR_DEV_SLAVE_B); + } + + /* Prune old entries */ + if (time_is_before_jiffies(timestamp + + msecs_to_jiffies(HSR_NODE_FORGET_TIME))) { + hsr_nl_nodedown(hsr_priv, node->MacAddressA); + list_del_rcu(&node->mac_list); + /* Note that we need to free this entry later: */ + call_rcu(&node->rcu_head, node_entry_reclaim); + } + } + rcu_read_unlock(); +} + + +void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos, + unsigned char addr[ETH_ALEN]) +{ + struct node_entry *node; + + if (!_pos) { + node = list_first_or_null_rcu(&hsr_priv->node_db, + struct node_entry, mac_list); + if (node) + memcpy(addr, node->MacAddressA, ETH_ALEN); + return node; + } + + node = _pos; + list_for_each_entry_continue_rcu(node, &hsr_priv->node_db, mac_list) { + memcpy(addr, node->MacAddressA, ETH_ALEN); + return node; + } + + return NULL; +} + + +int hsr_get_node_data(struct hsr_priv *hsr_priv, + const unsigned char *addr, + unsigned char addr_b[ETH_ALEN], + unsigned int *addr_b_ifindex, + int *if1_age, + u16 *if1_seq, + int *if2_age, + u16 *if2_seq) +{ + struct node_entry *node; + unsigned long tdiff; + + + rcu_read_lock(); + node = find_node_by_AddrA(&hsr_priv->node_db, addr); + if (!node) { + rcu_read_unlock(); + return -ENOENT; /* No such entry */ + } + + memcpy(addr_b, node->MacAddressB, ETH_ALEN); + + tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_A]; + if (node->time_in_stale[HSR_DEV_SLAVE_A]) + *if1_age = INT_MAX; +#if HZ <= MSEC_PER_SEC + else if (tdiff > msecs_to_jiffies(INT_MAX)) + *if1_age = INT_MAX; +#endif + else + *if1_age = jiffies_to_msecs(tdiff); + + tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_B]; + if (node->time_in_stale[HSR_DEV_SLAVE_B]) + *if2_age = INT_MAX; +#if HZ <= MSEC_PER_SEC + else if (tdiff > msecs_to_jiffies(INT_MAX)) + *if2_age = INT_MAX; +#endif + else + *if2_age = jiffies_to_msecs(tdiff); + + /* Present sequence numbers as if they were incoming on interface */ + *if1_seq = node->seq_out[HSR_DEV_SLAVE_B]; + *if2_seq = node->seq_out[HSR_DEV_SLAVE_A]; + + if ((node->AddrB_if != HSR_DEV_NONE) && hsr_priv->slave[node->AddrB_if]) + *addr_b_ifindex = hsr_priv->slave[node->AddrB_if]->ifindex; + else + *addr_b_ifindex = -1; + + rcu_read_unlock(); + + return 0; +} diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h new file mode 100644 index 000000000000..e6c4022030ad --- /dev/null +++ b/net/hsr/hsr_framereg.h @@ -0,0 +1,53 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + */ + +#ifndef _HSR_FRAMEREG_H +#define _HSR_FRAMEREG_H + +#include "hsr_main.h" + +struct node_entry; + +struct node_entry *hsr_find_node(struct list_head *node_db, struct sk_buff *skb); + +struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, + struct node_entry *node, + struct sk_buff *skb, + enum hsr_dev_idx dev_idx); + +void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb); +void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr, + enum hsr_dev_idx dev_idx); + +void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx); + +int hsr_register_frame_out(struct node_entry *node, enum hsr_dev_idx dev_idx, + struct sk_buff *skb); + +void hsr_prune_nodes(struct hsr_priv *hsr_priv); + +int hsr_create_self_node(struct list_head *self_node_db, + unsigned char addr_a[ETH_ALEN], + unsigned char addr_b[ETH_ALEN]); + +void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos, + unsigned char addr[ETH_ALEN]); + +int hsr_get_node_data(struct hsr_priv *hsr_priv, + const unsigned char *addr, + unsigned char addr_b[ETH_ALEN], + unsigned int *addr_b_ifindex, + int *if1_age, + u16 *if1_seq, + int *if2_age, + u16 *if2_seq); + +#endif /* _HSR_FRAMEREG_H */ diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c new file mode 100644 index 000000000000..af68dd83a4e3 --- /dev/null +++ b/net/hsr/hsr_main.c @@ -0,0 +1,469 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * + * In addition to routines for registering and unregistering HSR support, this + * file also contains the receive routine that handles all incoming frames with + * Ethertype (protocol) ETH_P_PRP (HSRv0), and network device event handling. + */ + +#include <linux/netdevice.h> +#include <linux/rculist.h> +#include <linux/timer.h> +#include <linux/etherdevice.h> +#include "hsr_main.h" +#include "hsr_device.h" +#include "hsr_netlink.h" +#include "hsr_framereg.h" + + +/* List of all registered virtual HSR devices */ +static LIST_HEAD(hsr_list); + +void register_hsr_master(struct hsr_priv *hsr_priv) +{ + list_add_tail_rcu(&hsr_priv->hsr_list, &hsr_list); +} + +void unregister_hsr_master(struct hsr_priv *hsr_priv) +{ + struct hsr_priv *hsr_priv_it; + + list_for_each_entry(hsr_priv_it, &hsr_list, hsr_list) + if (hsr_priv_it == hsr_priv) { + list_del_rcu(&hsr_priv_it->hsr_list); + return; + } +} + +bool is_hsr_slave(struct net_device *dev) +{ + struct hsr_priv *hsr_priv_it; + + list_for_each_entry_rcu(hsr_priv_it, &hsr_list, hsr_list) { + if (dev == hsr_priv_it->slave[0]) + return true; + if (dev == hsr_priv_it->slave[1]) + return true; + } + + return false; +} + + +/* If dev is a HSR slave device, return the virtual master device. Return NULL + * otherwise. + */ +static struct hsr_priv *get_hsr_master(struct net_device *dev) +{ + struct hsr_priv *hsr_priv; + + rcu_read_lock(); + list_for_each_entry_rcu(hsr_priv, &hsr_list, hsr_list) + if ((dev == hsr_priv->slave[0]) || + (dev == hsr_priv->slave[1])) { + rcu_read_unlock(); + return hsr_priv; + } + + rcu_read_unlock(); + return NULL; +} + + +/* If dev is a HSR slave device, return the other slave device. Return NULL + * otherwise. + */ +static struct net_device *get_other_slave(struct hsr_priv *hsr_priv, + struct net_device *dev) +{ + if (dev == hsr_priv->slave[0]) + return hsr_priv->slave[1]; + if (dev == hsr_priv->slave[1]) + return hsr_priv->slave[0]; + + return NULL; +} + + +static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct net_device *slave, *other_slave; + struct hsr_priv *hsr_priv; + int old_operstate; + int mtu_max; + int res; + struct net_device *dev; + + dev = netdev_notifier_info_to_dev(ptr); + + hsr_priv = get_hsr_master(dev); + if (hsr_priv) { + /* dev is a slave device */ + slave = dev; + other_slave = get_other_slave(hsr_priv, slave); + } else { + if (!is_hsr_master(dev)) + return NOTIFY_DONE; + hsr_priv = netdev_priv(dev); + slave = hsr_priv->slave[0]; + other_slave = hsr_priv->slave[1]; + } + + switch (event) { + case NETDEV_UP: /* Administrative state DOWN */ + case NETDEV_DOWN: /* Administrative state UP */ + case NETDEV_CHANGE: /* Link (carrier) state changes */ + old_operstate = hsr_priv->dev->operstate; + hsr_set_carrier(hsr_priv->dev, slave, other_slave); + /* netif_stacked_transfer_operstate() cannot be used here since + * it doesn't set IF_OPER_LOWERLAYERDOWN (?) + */ + hsr_set_operstate(hsr_priv->dev, slave, other_slave); + hsr_check_announce(hsr_priv->dev, old_operstate); + break; + case NETDEV_CHANGEADDR: + + /* This should not happen since there's no ndo_set_mac_address() + * for HSR devices - i.e. not supported. + */ + if (dev == hsr_priv->dev) + break; + + if (dev == hsr_priv->slave[0]) + memcpy(hsr_priv->dev->dev_addr, + hsr_priv->slave[0]->dev_addr, ETH_ALEN); + + /* Make sure we recognize frames from ourselves in hsr_rcv() */ + res = hsr_create_self_node(&hsr_priv->self_node_db, + hsr_priv->dev->dev_addr, + hsr_priv->slave[1] ? + hsr_priv->slave[1]->dev_addr : + hsr_priv->dev->dev_addr); + if (res) + netdev_warn(hsr_priv->dev, + "Could not update HSR node address.\n"); + + if (dev == hsr_priv->slave[0]) + call_netdevice_notifiers(NETDEV_CHANGEADDR, hsr_priv->dev); + break; + case NETDEV_CHANGEMTU: + if (dev == hsr_priv->dev) + break; /* Handled in ndo_change_mtu() */ + mtu_max = hsr_get_max_mtu(hsr_priv); + if (hsr_priv->dev->mtu > mtu_max) + dev_set_mtu(hsr_priv->dev, mtu_max); + break; + case NETDEV_UNREGISTER: + if (dev == hsr_priv->slave[0]) + hsr_priv->slave[0] = NULL; + if (dev == hsr_priv->slave[1]) + hsr_priv->slave[1] = NULL; + + /* There should really be a way to set a new slave device... */ + + break; + case NETDEV_PRE_TYPE_CHANGE: + /* HSR works only on Ethernet devices. Refuse slave to change + * its type. + */ + return NOTIFY_BAD; + } + + return NOTIFY_DONE; +} + + +static struct timer_list prune_timer; + +static void prune_nodes_all(unsigned long data) +{ + struct hsr_priv *hsr_priv; + + rcu_read_lock(); + list_for_each_entry_rcu(hsr_priv, &hsr_list, hsr_list) + hsr_prune_nodes(hsr_priv); + rcu_read_unlock(); + + prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD); + add_timer(&prune_timer); +} + + +static struct sk_buff *hsr_pull_tag(struct sk_buff *skb) +{ + struct hsr_tag *hsr_tag; + struct sk_buff *skb2; + + skb2 = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb2)) + goto err_free; + skb = skb2; + + if (unlikely(!pskb_may_pull(skb, HSR_TAGLEN))) + goto err_free; + + hsr_tag = (struct hsr_tag *) skb->data; + skb->protocol = hsr_tag->encap_proto; + skb_pull(skb, HSR_TAGLEN); + + return skb; + +err_free: + kfree_skb(skb); + return NULL; +} + + +/* The uses I can see for these HSR supervision frames are: + * 1) Use the frames that are sent after node initialization ("HSR_TLV.Type = + * 22") to reset any sequence_nr counters belonging to that node. Useful if + * the other node's counter has been reset for some reason. + * -- + * Or not - resetting the counter and bridging the frame would create a + * loop, unfortunately. + * + * 2) Use the LifeCheck frames to detect ring breaks. I.e. if no LifeCheck + * frame is received from a particular node, we know something is wrong. + * We just register these (as with normal frames) and throw them away. + * + * 3) Allow different MAC addresses for the two slave interfaces, using the + * MacAddressA field. + */ +static bool is_supervision_frame(struct hsr_priv *hsr_priv, struct sk_buff *skb) +{ + struct hsr_sup_tag *hsr_stag; + + if (!ether_addr_equal(eth_hdr(skb)->h_dest, + hsr_priv->sup_multicast_addr)) + return false; + + hsr_stag = (struct hsr_sup_tag *) skb->data; + if (get_hsr_stag_path(hsr_stag) != 0x0f) + return false; + if ((hsr_stag->HSR_TLV_Type != HSR_TLV_ANNOUNCE) && + (hsr_stag->HSR_TLV_Type != HSR_TLV_LIFE_CHECK)) + return false; + if (hsr_stag->HSR_TLV_Length != 12) + return false; + + return true; +} + + +/* Implementation somewhat according to IEC-62439-3, p. 43 + */ +static int hsr_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct hsr_priv *hsr_priv; + struct net_device *other_slave; + struct node_entry *node; + bool deliver_to_self; + struct sk_buff *skb_deliver; + enum hsr_dev_idx dev_in_idx, dev_other_idx; + bool dup_out; + int ret; + + hsr_priv = get_hsr_master(dev); + + if (!hsr_priv) { + /* Non-HSR-slave device 'dev' is connected to a HSR network */ + kfree_skb(skb); + dev->stats.rx_errors++; + return NET_RX_SUCCESS; + } + + if (dev == hsr_priv->slave[0]) { + dev_in_idx = HSR_DEV_SLAVE_A; + dev_other_idx = HSR_DEV_SLAVE_B; + } else { + dev_in_idx = HSR_DEV_SLAVE_B; + dev_other_idx = HSR_DEV_SLAVE_A; + } + + node = hsr_find_node(&hsr_priv->self_node_db, skb); + if (node) { + /* Always kill frames sent by ourselves */ + kfree_skb(skb); + return NET_RX_SUCCESS; + } + + /* Is this frame a candidate for local reception? */ + deliver_to_self = false; + if ((skb->pkt_type == PACKET_HOST) || + (skb->pkt_type == PACKET_MULTICAST) || + (skb->pkt_type == PACKET_BROADCAST)) + deliver_to_self = true; + else if (ether_addr_equal(eth_hdr(skb)->h_dest, + hsr_priv->dev->dev_addr)) { + skb->pkt_type = PACKET_HOST; + deliver_to_self = true; + } + + + rcu_read_lock(); /* node_db */ + node = hsr_find_node(&hsr_priv->node_db, skb); + + if (is_supervision_frame(hsr_priv, skb)) { + skb_pull(skb, sizeof(struct hsr_sup_tag)); + node = hsr_merge_node(hsr_priv, node, skb, dev_in_idx); + if (!node) { + rcu_read_unlock(); /* node_db */ + kfree_skb(skb); + hsr_priv->dev->stats.rx_dropped++; + return NET_RX_DROP; + } + skb_push(skb, sizeof(struct hsr_sup_tag)); + deliver_to_self = false; + } + + if (!node) { + /* Source node unknown; this might be a HSR frame from + * another net (different multicast address). Ignore it. + */ + rcu_read_unlock(); /* node_db */ + kfree_skb(skb); + return NET_RX_SUCCESS; + } + + /* Register ALL incoming frames as outgoing through the other interface. + * This allows us to register frames as incoming only if they are valid + * for the receiving interface, without using a specific counter for + * incoming frames. + */ + dup_out = hsr_register_frame_out(node, dev_other_idx, skb); + if (!dup_out) + hsr_register_frame_in(node, dev_in_idx); + + /* Forward this frame? */ + if (!dup_out && (skb->pkt_type != PACKET_HOST)) + other_slave = get_other_slave(hsr_priv, dev); + else + other_slave = NULL; + + if (hsr_register_frame_out(node, HSR_DEV_MASTER, skb)) + deliver_to_self = false; + + rcu_read_unlock(); /* node_db */ + + if (!deliver_to_self && !other_slave) { + kfree_skb(skb); + /* Circulated frame; silently remove it. */ + return NET_RX_SUCCESS; + } + + skb_deliver = skb; + if (deliver_to_self && other_slave) { + /* skb_clone() is not enough since we will strip the hsr tag + * and do address substitution below + */ + skb_deliver = pskb_copy(skb, GFP_ATOMIC); + if (!skb_deliver) { + deliver_to_self = false; + hsr_priv->dev->stats.rx_dropped++; + } + } + + if (deliver_to_self) { + bool multicast_frame; + + skb_deliver = hsr_pull_tag(skb_deliver); + if (!skb_deliver) { + hsr_priv->dev->stats.rx_dropped++; + goto forward; + } +#if !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + /* Move everything in the header that is after the HSR tag, + * to work around alignment problems caused by the 6-byte HSR + * tag. In practice, this removes/overwrites the HSR tag in + * the header and restores a "standard" packet. + */ + memmove(skb_deliver->data - HSR_TAGLEN, skb_deliver->data, + skb_headlen(skb_deliver)); + + /* Adjust skb members so they correspond with the move above. + * This cannot possibly underflow skb->data since hsr_pull_tag() + * above succeeded. + * At this point in the protocol stack, the transport and + * network headers have not been set yet, and we haven't touched + * the mac header nor the head. So we only need to adjust data + * and tail: + */ + skb_deliver->data -= HSR_TAGLEN; + skb_deliver->tail -= HSR_TAGLEN; +#endif + skb_deliver->dev = hsr_priv->dev; + hsr_addr_subst_source(hsr_priv, skb_deliver); + multicast_frame = (skb_deliver->pkt_type == PACKET_MULTICAST); + ret = netif_rx(skb_deliver); + if (ret == NET_RX_DROP) { + hsr_priv->dev->stats.rx_dropped++; + } else { + hsr_priv->dev->stats.rx_packets++; + hsr_priv->dev->stats.rx_bytes += skb->len; + if (multicast_frame) + hsr_priv->dev->stats.multicast++; + } + } + +forward: + if (other_slave) { + skb_push(skb, ETH_HLEN); + skb->dev = other_slave; + dev_queue_xmit(skb); + } + + return NET_RX_SUCCESS; +} + + +static struct packet_type hsr_pt __read_mostly = { + .type = htons(ETH_P_PRP), + .func = hsr_rcv, +}; + +static struct notifier_block hsr_nb = { + .notifier_call = hsr_netdev_notify, /* Slave event notifications */ +}; + + +static int __init hsr_init(void) +{ + int res; + + BUILD_BUG_ON(sizeof(struct hsr_tag) != HSR_TAGLEN); + + dev_add_pack(&hsr_pt); + + init_timer(&prune_timer); + prune_timer.function = prune_nodes_all; + prune_timer.data = 0; + prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD); + add_timer(&prune_timer); + + register_netdevice_notifier(&hsr_nb); + + res = hsr_netlink_init(); + + return res; +} + +static void __exit hsr_exit(void) +{ + unregister_netdevice_notifier(&hsr_nb); + del_timer(&prune_timer); + hsr_netlink_exit(); + dev_remove_pack(&hsr_pt); +} + +module_init(hsr_init); +module_exit(hsr_exit); +MODULE_LICENSE("GPL"); diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h new file mode 100644 index 000000000000..56fe060c0ab1 --- /dev/null +++ b/net/hsr/hsr_main.h @@ -0,0 +1,166 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + */ + +#ifndef _HSR_PRIVATE_H +#define _HSR_PRIVATE_H + +#include <linux/netdevice.h> +#include <linux/list.h> + + +/* Time constants as specified in the HSR specification (IEC-62439-3 2010) + * Table 8. + * All values in milliseconds. + */ +#define HSR_LIFE_CHECK_INTERVAL 2000 /* ms */ +#define HSR_NODE_FORGET_TIME 60000 /* ms */ +#define HSR_ANNOUNCE_INTERVAL 100 /* ms */ + + +/* By how much may slave1 and slave2 timestamps of latest received frame from + * each node differ before we notify of communication problem? + */ +#define MAX_SLAVE_DIFF 3000 /* ms */ + + +/* How often shall we check for broken ring and remove node entries older than + * HSR_NODE_FORGET_TIME? + */ +#define PRUNE_PERIOD 3000 /* ms */ + + +#define HSR_TLV_ANNOUNCE 22 +#define HSR_TLV_LIFE_CHECK 23 + + +/* HSR Tag. + * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB, + * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest, + * h_source, h_proto = 0x88FB }, and add { path, LSDU_size, sequence Nr, + * encapsulated protocol } instead. + */ +#define HSR_TAGLEN 6 + +/* Field names below as defined in the IEC:2010 standard for HSR. */ +struct hsr_tag { + __be16 path_and_LSDU_size; + __be16 sequence_nr; + __be16 encap_proto; +} __packed; + + +/* The helper functions below assumes that 'path' occupies the 4 most + * significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or + * equivalently, the 4 most significant bits of HSR tag byte 14). + * + * This is unclear in the IEC specification; its definition of MAC addresses + * indicates the spec is written with the least significant bit first (to the + * left). This, however, would mean that the LSDU field would be split in two + * with the path field in-between, which seems strange. I'm guessing the MAC + * address definition is in error. + */ +static inline u16 get_hsr_tag_path(struct hsr_tag *ht) +{ + return ntohs(ht->path_and_LSDU_size) >> 12; +} + +static inline u16 get_hsr_tag_LSDU_size(struct hsr_tag *ht) +{ + return ntohs(ht->path_and_LSDU_size) & 0x0FFF; +} + +static inline void set_hsr_tag_path(struct hsr_tag *ht, u16 path) +{ + ht->path_and_LSDU_size = htons( + (ntohs(ht->path_and_LSDU_size) & 0x0FFF) | (path << 12)); +} + +static inline void set_hsr_tag_LSDU_size(struct hsr_tag *ht, u16 LSDU_size) +{ + ht->path_and_LSDU_size = htons( + (ntohs(ht->path_and_LSDU_size) & 0xF000) | + (LSDU_size & 0x0FFF)); +} + +struct hsr_ethhdr { + struct ethhdr ethhdr; + struct hsr_tag hsr_tag; +} __packed; + + +/* HSR Supervision Frame data types. + * Field names as defined in the IEC:2010 standard for HSR. + */ +struct hsr_sup_tag { + __be16 path_and_HSR_Ver; + __be16 sequence_nr; + __u8 HSR_TLV_Type; + __u8 HSR_TLV_Length; +} __packed; + +struct hsr_sup_payload { + unsigned char MacAddressA[ETH_ALEN]; +} __packed; + +static inline u16 get_hsr_stag_path(struct hsr_sup_tag *hst) +{ + return get_hsr_tag_path((struct hsr_tag *) hst); +} + +static inline u16 get_hsr_stag_HSR_ver(struct hsr_sup_tag *hst) +{ + return get_hsr_tag_LSDU_size((struct hsr_tag *) hst); +} + +static inline void set_hsr_stag_path(struct hsr_sup_tag *hst, u16 path) +{ + set_hsr_tag_path((struct hsr_tag *) hst, path); +} + +static inline void set_hsr_stag_HSR_Ver(struct hsr_sup_tag *hst, u16 HSR_Ver) +{ + set_hsr_tag_LSDU_size((struct hsr_tag *) hst, HSR_Ver); +} + +struct hsr_ethhdr_sp { + struct ethhdr ethhdr; + struct hsr_sup_tag hsr_sup; +} __packed; + + +enum hsr_dev_idx { + HSR_DEV_NONE = -1, + HSR_DEV_SLAVE_A = 0, + HSR_DEV_SLAVE_B, + HSR_DEV_MASTER, +}; +#define HSR_MAX_SLAVE (HSR_DEV_SLAVE_B + 1) +#define HSR_MAX_DEV (HSR_DEV_MASTER + 1) + +struct hsr_priv { + struct list_head hsr_list; /* List of hsr devices */ + struct rcu_head rcu_head; + struct net_device *dev; + struct net_device *slave[HSR_MAX_SLAVE]; + struct list_head node_db; /* Other HSR nodes */ + struct list_head self_node_db; /* MACs of slaves */ + struct timer_list announce_timer; /* Supervision frame dispatch */ + int announce_count; + u16 sequence_nr; + spinlock_t seqnr_lock; /* locking for sequence_nr */ + unsigned char sup_multicast_addr[ETH_ALEN]; +}; + +void register_hsr_master(struct hsr_priv *hsr_priv); +void unregister_hsr_master(struct hsr_priv *hsr_priv); +bool is_hsr_slave(struct net_device *dev); + +#endif /* _HSR_PRIVATE_H */ diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c new file mode 100644 index 000000000000..4e66bf61f585 --- /dev/null +++ b/net/hsr/hsr_netlink.c @@ -0,0 +1,457 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * + * Routines for handling Netlink messages for HSR. + */ + +#include "hsr_netlink.h" +#include <linux/kernel.h> +#include <net/rtnetlink.h> +#include <net/genetlink.h> +#include "hsr_main.h" +#include "hsr_device.h" +#include "hsr_framereg.h" + +static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { + [IFLA_HSR_SLAVE1] = { .type = NLA_U32 }, + [IFLA_HSR_SLAVE2] = { .type = NLA_U32 }, + [IFLA_HSR_MULTICAST_SPEC] = { .type = NLA_U8 }, +}; + + +/* Here, it seems a netdevice has already been allocated for us, and the + * hsr_dev_setup routine has been executed. Nice! + */ +static int hsr_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_device *link[2]; + unsigned char multicast_spec; + + if (!data[IFLA_HSR_SLAVE1]) { + netdev_info(dev, "IFLA_HSR_SLAVE1 missing!\n"); + return -EINVAL; + } + link[0] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE1])); + if (!data[IFLA_HSR_SLAVE2]) { + netdev_info(dev, "IFLA_HSR_SLAVE2 missing!\n"); + return -EINVAL; + } + link[1] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE2])); + + if (!link[0] || !link[1]) + return -ENODEV; + if (link[0] == link[1]) + return -EINVAL; + + if (!data[IFLA_HSR_MULTICAST_SPEC]) + multicast_spec = 0; + else + multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]); + + return hsr_dev_finalize(dev, link, multicast_spec); +} + +static struct rtnl_link_ops hsr_link_ops __read_mostly = { + .kind = "hsr", + .maxtype = IFLA_HSR_MAX, + .policy = hsr_policy, + .priv_size = sizeof(struct hsr_priv), + .setup = hsr_dev_setup, + .newlink = hsr_newlink, +}; + + + +/* attribute policy */ +/* NLA_BINARY missing in libnl; use NLA_UNSPEC in userspace instead. */ +static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = { + [HSR_A_NODE_ADDR] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [HSR_A_NODE_ADDR_B] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [HSR_A_IFINDEX] = { .type = NLA_U32 }, + [HSR_A_IF1_AGE] = { .type = NLA_U32 }, + [HSR_A_IF2_AGE] = { .type = NLA_U32 }, + [HSR_A_IF1_SEQ] = { .type = NLA_U16 }, + [HSR_A_IF2_SEQ] = { .type = NLA_U16 }, +}; + +static struct genl_family hsr_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = "HSR", + .version = 1, + .maxattr = HSR_A_MAX, +}; + +static struct genl_multicast_group hsr_network_genl_mcgrp = { + .name = "hsr-network", +}; + + + +/* This is called if for some node with MAC address addr, we only get frames + * over one of the slave interfaces. This would indicate an open network ring + * (i.e. a link has failed somewhere). + */ +void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN], + enum hsr_dev_idx dev_idx) +{ + struct sk_buff *skb; + void *msg_head; + int res; + int ifindex; + + skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + goto fail; + + msg_head = genlmsg_put(skb, 0, 0, &hsr_genl_family, 0, HSR_C_RING_ERROR); + if (!msg_head) + goto nla_put_failure; + + res = nla_put(skb, HSR_A_NODE_ADDR, ETH_ALEN, addr); + if (res < 0) + goto nla_put_failure; + + if (hsr_priv->slave[dev_idx]) + ifindex = hsr_priv->slave[dev_idx]->ifindex; + else + ifindex = -1; + res = nla_put_u32(skb, HSR_A_IFINDEX, ifindex); + if (res < 0) + goto nla_put_failure; + + genlmsg_end(skb, msg_head); + genlmsg_multicast(skb, 0, hsr_network_genl_mcgrp.id, GFP_ATOMIC); + + return; + +nla_put_failure: + kfree_skb(skb); + +fail: + netdev_warn(hsr_priv->dev, "Could not send HSR ring error message\n"); +} + +/* This is called when we haven't heard from the node with MAC address addr for + * some time (just before the node is removed from the node table/list). + */ +void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN]) +{ + struct sk_buff *skb; + void *msg_head; + int res; + + skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + goto fail; + + msg_head = genlmsg_put(skb, 0, 0, &hsr_genl_family, 0, HSR_C_NODE_DOWN); + if (!msg_head) + goto nla_put_failure; + + + res = nla_put(skb, HSR_A_NODE_ADDR, ETH_ALEN, addr); + if (res < 0) + goto nla_put_failure; + + genlmsg_end(skb, msg_head); + genlmsg_multicast(skb, 0, hsr_network_genl_mcgrp.id, GFP_ATOMIC); + + return; + +nla_put_failure: + kfree_skb(skb); + +fail: + netdev_warn(hsr_priv->dev, "Could not send HSR node down\n"); +} + + +/* HSR_C_GET_NODE_STATUS lets userspace query the internal HSR node table + * about the status of a specific node in the network, defined by its MAC + * address. + * + * Input: hsr ifindex, node mac address + * Output: hsr ifindex, node mac address (copied from request), + * age of latest frame from node over slave 1, slave 2 [ms] + */ +static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) +{ + /* For receiving */ + struct nlattr *na; + struct net_device *hsr_dev; + + /* For sending */ + struct sk_buff *skb_out; + void *msg_head; + struct hsr_priv *hsr_priv; + unsigned char hsr_node_addr_b[ETH_ALEN]; + int hsr_node_if1_age; + u16 hsr_node_if1_seq; + int hsr_node_if2_age; + u16 hsr_node_if2_seq; + int addr_b_ifindex; + int res; + + if (!info) + goto invalid; + + na = info->attrs[HSR_A_IFINDEX]; + if (!na) + goto invalid; + na = info->attrs[HSR_A_NODE_ADDR]; + if (!na) + goto invalid; + + hsr_dev = __dev_get_by_index(genl_info_net(info), + nla_get_u32(info->attrs[HSR_A_IFINDEX])); + if (!hsr_dev) + goto invalid; + if (!is_hsr_master(hsr_dev)) + goto invalid; + + + /* Send reply */ + + skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb_out) { + res = -ENOMEM; + goto fail; + } + + msg_head = genlmsg_put(skb_out, NETLINK_CB(skb_in).portid, + info->snd_seq, &hsr_genl_family, 0, + HSR_C_SET_NODE_STATUS); + if (!msg_head) { + res = -ENOMEM; + goto nla_put_failure; + } + + res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex); + if (res < 0) + goto nla_put_failure; + + hsr_priv = netdev_priv(hsr_dev); + res = hsr_get_node_data(hsr_priv, + (unsigned char *) nla_data(info->attrs[HSR_A_NODE_ADDR]), + hsr_node_addr_b, + &addr_b_ifindex, + &hsr_node_if1_age, + &hsr_node_if1_seq, + &hsr_node_if2_age, + &hsr_node_if2_seq); + if (res < 0) + goto fail; + + res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, + nla_data(info->attrs[HSR_A_NODE_ADDR])); + if (res < 0) + goto nla_put_failure; + + if (addr_b_ifindex > -1) { + res = nla_put(skb_out, HSR_A_NODE_ADDR_B, ETH_ALEN, + hsr_node_addr_b); + if (res < 0) + goto nla_put_failure; + + res = nla_put_u32(skb_out, HSR_A_ADDR_B_IFINDEX, addr_b_ifindex); + if (res < 0) + goto nla_put_failure; + } + + res = nla_put_u32(skb_out, HSR_A_IF1_AGE, hsr_node_if1_age); + if (res < 0) + goto nla_put_failure; + res = nla_put_u16(skb_out, HSR_A_IF1_SEQ, hsr_node_if1_seq); + if (res < 0) + goto nla_put_failure; + if (hsr_priv->slave[0]) + res = nla_put_u32(skb_out, HSR_A_IF1_IFINDEX, + hsr_priv->slave[0]->ifindex); + if (res < 0) + goto nla_put_failure; + + res = nla_put_u32(skb_out, HSR_A_IF2_AGE, hsr_node_if2_age); + if (res < 0) + goto nla_put_failure; + res = nla_put_u16(skb_out, HSR_A_IF2_SEQ, hsr_node_if2_seq); + if (res < 0) + goto nla_put_failure; + if (hsr_priv->slave[1]) + res = nla_put_u32(skb_out, HSR_A_IF2_IFINDEX, + hsr_priv->slave[1]->ifindex); + + genlmsg_end(skb_out, msg_head); + genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid); + + return 0; + +invalid: + netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL); + return 0; + +nla_put_failure: + kfree_skb(skb_out); + /* Fall through */ + +fail: + return res; +} + +static struct genl_ops hsr_ops_get_node_status = { + .cmd = HSR_C_GET_NODE_STATUS, + .flags = 0, + .policy = hsr_genl_policy, + .doit = hsr_get_node_status, + .dumpit = NULL, +}; + + +/* Get a list of MacAddressA of all nodes known to this node (other than self). + */ +static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) +{ + /* For receiving */ + struct nlattr *na; + struct net_device *hsr_dev; + + /* For sending */ + struct sk_buff *skb_out; + void *msg_head; + struct hsr_priv *hsr_priv; + void *pos; + unsigned char addr[ETH_ALEN]; + int res; + + if (!info) + goto invalid; + + na = info->attrs[HSR_A_IFINDEX]; + if (!na) + goto invalid; + + hsr_dev = __dev_get_by_index(genl_info_net(info), + nla_get_u32(info->attrs[HSR_A_IFINDEX])); + if (!hsr_dev) + goto invalid; + if (!is_hsr_master(hsr_dev)) + goto invalid; + + + /* Send reply */ + + skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb_out) { + res = -ENOMEM; + goto fail; + } + + msg_head = genlmsg_put(skb_out, NETLINK_CB(skb_in).portid, + info->snd_seq, &hsr_genl_family, 0, + HSR_C_SET_NODE_LIST); + if (!msg_head) { + res = -ENOMEM; + goto nla_put_failure; + } + + res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex); + if (res < 0) + goto nla_put_failure; + + hsr_priv = netdev_priv(hsr_dev); + + rcu_read_lock(); + pos = hsr_get_next_node(hsr_priv, NULL, addr); + while (pos) { + res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, addr); + if (res < 0) { + rcu_read_unlock(); + goto nla_put_failure; + } + pos = hsr_get_next_node(hsr_priv, pos, addr); + } + rcu_read_unlock(); + + genlmsg_end(skb_out, msg_head); + genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid); + + return 0; + +invalid: + netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL); + return 0; + +nla_put_failure: + kfree_skb(skb_out); + /* Fall through */ + +fail: + return res; +} + + +static struct genl_ops hsr_ops_get_node_list = { + .cmd = HSR_C_GET_NODE_LIST, + .flags = 0, + .policy = hsr_genl_policy, + .doit = hsr_get_node_list, + .dumpit = NULL, +}; + +int __init hsr_netlink_init(void) +{ + int rc; + + rc = rtnl_link_register(&hsr_link_ops); + if (rc) + goto fail_rtnl_link_register; + + rc = genl_register_family(&hsr_genl_family); + if (rc) + goto fail_genl_register_family; + + rc = genl_register_ops(&hsr_genl_family, &hsr_ops_get_node_status); + if (rc) + goto fail_genl_register_ops; + + rc = genl_register_ops(&hsr_genl_family, &hsr_ops_get_node_list); + if (rc) + goto fail_genl_register_ops_node_list; + + rc = genl_register_mc_group(&hsr_genl_family, &hsr_network_genl_mcgrp); + if (rc) + goto fail_genl_register_mc_group; + + return 0; + +fail_genl_register_mc_group: + genl_unregister_ops(&hsr_genl_family, &hsr_ops_get_node_list); +fail_genl_register_ops_node_list: + genl_unregister_ops(&hsr_genl_family, &hsr_ops_get_node_status); +fail_genl_register_ops: + genl_unregister_family(&hsr_genl_family); +fail_genl_register_family: + rtnl_link_unregister(&hsr_link_ops); +fail_rtnl_link_register: + + return rc; +} + +void __exit hsr_netlink_exit(void) +{ + genl_unregister_mc_group(&hsr_genl_family, &hsr_network_genl_mcgrp); + genl_unregister_ops(&hsr_genl_family, &hsr_ops_get_node_status); + genl_unregister_family(&hsr_genl_family); + + rtnl_link_unregister(&hsr_link_ops); +} + +MODULE_ALIAS_RTNL_LINK("hsr"); diff --git a/net/hsr/hsr_netlink.h b/net/hsr/hsr_netlink.h new file mode 100644 index 000000000000..d4579dcc3c7d --- /dev/null +++ b/net/hsr/hsr_netlink.h @@ -0,0 +1,30 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + */ + +#ifndef __HSR_NETLINK_H +#define __HSR_NETLINK_H + +#include <linux/if_ether.h> +#include <linux/module.h> +#include <uapi/linux/hsr_netlink.h> + +struct hsr_priv; + +int __init hsr_netlink_init(void); +void __exit hsr_netlink_exit(void); + +void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN], + int dev_idx); +void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN]); +void hsr_nl_framedrop(int dropcount, int dev_idx); +void hsr_nl_linkdown(int dev_idx); + +#endif /* __HSR_NETLINK_H */ diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index fde90e63027d..9497c6f3276b 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -654,7 +654,9 @@ static int lowpan_header_create(struct sk_buff *skb, head[1] = iphc1; skb_pull(skb, sizeof(struct ipv6hdr)); + skb_reset_transport_header(skb); memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head); + skb_reset_network_header(skb); lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); @@ -737,7 +739,6 @@ static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr) return -ENOMEM; skb_push(new, sizeof(struct ipv6hdr)); - skb_reset_network_header(new); skb_copy_to_linear_data(new, hdr, sizeof(struct ipv6hdr)); new->protocol = htons(ETH_P_IPV6); @@ -1059,7 +1060,6 @@ lowpan_process_data(struct sk_buff *skb) skb = new; skb_push(skb, sizeof(struct udphdr)); - skb_reset_transport_header(skb); skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr)); lowpan_raw_dump_table(__func__, "raw UDP header dump", @@ -1102,17 +1102,6 @@ static int lowpan_set_address(struct net_device *dev, void *p) return 0; } -static int lowpan_get_mac_header_length(struct sk_buff *skb) -{ - /* - * Currently long addressing mode is supported only, so the overall - * header size is 21: - * FC SeqNum DPAN DA SA Sec - * 2 + 1 + 2 + 8 + 8 + 0 = 21 - */ - return 21; -} - static int lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, int mlen, int plen, int offset, int type) @@ -1133,12 +1122,15 @@ lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, frag->priority = skb->priority; /* copy header, MFR and payload */ - memcpy(skb_put(frag, mlen), skb->data, mlen); - memcpy(skb_put(frag, hlen), head, hlen); + skb_put(frag, mlen); + skb_copy_to_linear_data(frag, skb_mac_header(skb), mlen); + + skb_put(frag, hlen); + skb_copy_to_linear_data_offset(frag, mlen, head, hlen); - if (plen) - skb_copy_from_linear_data_offset(skb, offset + mlen, - skb_put(frag, plen), plen); + skb_put(frag, plen); + skb_copy_to_linear_data_offset(frag, mlen + hlen, + skb_network_header(skb) + offset, plen); lowpan_raw_dump_table(__func__, " raw fragment dump", frag->data, frag->len); @@ -1152,7 +1144,7 @@ lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev) int err, header_length, payload_length, tag, offset = 0; u8 head[5]; - header_length = lowpan_get_mac_header_length(skb); + header_length = skb->mac_len; payload_length = skb->len - header_length; tag = lowpan_dev_info(dev)->fragment_tag++; @@ -1323,8 +1315,6 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, /* Pull off the 1-byte of 6lowpan header. */ skb_pull(local_skb, 1); - skb_reset_network_header(local_skb); - skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); lowpan_give_skb_to_devices(local_skb); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 109ee89f123e..7785b28061ac 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -121,7 +121,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) struct aead_givcrypt_request *req; struct scatterlist *sg; struct scatterlist *asg; - struct esp_data *esp; struct sk_buff *trailer; void *tmp; u8 *iv; @@ -139,8 +138,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) /* skb is pure payload to encrypt */ - esp = x->data; - aead = esp->aead; + aead = x->data; alen = crypto_aead_authsize(aead); tfclen = 0; @@ -154,8 +152,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } blksize = ALIGN(crypto_aead_blocksize(aead), 4); clen = ALIGN(skb->len + 2 + tfclen, blksize); - if (esp->padlen) - clen = ALIGN(clen, esp->padlen); plen = clen - skb->len - tfclen; err = skb_cow_data(skb, tfclen + plen + alen, &trailer); @@ -280,8 +276,7 @@ static int esp_input_done2(struct sk_buff *skb, int err) { const struct iphdr *iph; struct xfrm_state *x = xfrm_input_state(skb); - struct esp_data *esp = x->data; - struct crypto_aead *aead = esp->aead; + struct crypto_aead *aead = x->data; int alen = crypto_aead_authsize(aead); int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); int elen = skb->len - hlen; @@ -376,8 +371,7 @@ static void esp_input_done(struct crypto_async_request *base, int err) static int esp_input(struct xfrm_state *x, struct sk_buff *skb) { struct ip_esp_hdr *esph; - struct esp_data *esp = x->data; - struct crypto_aead *aead = esp->aead; + struct crypto_aead *aead = x->data; struct aead_request *req; struct sk_buff *trailer; int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); @@ -459,9 +453,8 @@ out: static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) { - struct esp_data *esp = x->data; - u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); - u32 align = max_t(u32, blksize, esp->padlen); + struct crypto_aead *aead = x->data; + u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4); unsigned int net_adj; switch (x->props.mode) { @@ -476,8 +469,8 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) BUG(); } - return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - - net_adj) & ~(align - 1)) + net_adj - 2; + return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - + net_adj) & ~(blksize - 1)) + net_adj - 2; } static void esp4_err(struct sk_buff *skb, u32 info) @@ -511,18 +504,16 @@ static void esp4_err(struct sk_buff *skb, u32 info) static void esp_destroy(struct xfrm_state *x) { - struct esp_data *esp = x->data; + struct crypto_aead *aead = x->data; - if (!esp) + if (!aead) return; - crypto_free_aead(esp->aead); - kfree(esp); + crypto_free_aead(aead); } static int esp_init_aead(struct xfrm_state *x) { - struct esp_data *esp = x->data; struct crypto_aead *aead; int err; @@ -531,7 +522,7 @@ static int esp_init_aead(struct xfrm_state *x) if (IS_ERR(aead)) goto error; - esp->aead = aead; + x->data = aead; err = crypto_aead_setkey(aead, x->aead->alg_key, (x->aead->alg_key_len + 7) / 8); @@ -548,7 +539,6 @@ error: static int esp_init_authenc(struct xfrm_state *x) { - struct esp_data *esp = x->data; struct crypto_aead *aead; struct crypto_authenc_key_param *param; struct rtattr *rta; @@ -583,7 +573,7 @@ static int esp_init_authenc(struct xfrm_state *x) if (IS_ERR(aead)) goto error; - esp->aead = aead; + x->data = aead; keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) + (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param)); @@ -638,16 +628,11 @@ error: static int esp_init_state(struct xfrm_state *x) { - struct esp_data *esp; struct crypto_aead *aead; u32 align; int err; - esp = kzalloc(sizeof(*esp), GFP_KERNEL); - if (esp == NULL) - return -ENOMEM; - - x->data = esp; + x->data = NULL; if (x->aead) err = esp_init_aead(x); @@ -657,9 +642,7 @@ static int esp_init_state(struct xfrm_state *x) if (err) goto error; - aead = esp->aead; - - esp->padlen = 0; + aead = x->data; x->props.header_len = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); @@ -683,9 +666,7 @@ static int esp_init_state(struct xfrm_state *x) } align = ALIGN(crypto_aead_blocksize(aead), 4); - if (esp->padlen) - align = max_t(u32, align, esp->padlen); - x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead); + x->props.trailer_len = align + 1 + crypto_aead_authsize(aead); error: return err; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 85a4f21aac1a..59da7cde0724 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -271,6 +271,11 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index d23118d95ff9..718dfbd30cbe 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -327,6 +327,11 @@ ipt_do_table(struct sk_buff *skb, addend = xt_write_recseq_begin(); private = table->private; cpu = smp_processor_id(); + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); table_base = private->entries[cpu]; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; stackptr = per_cpu_ptr(private->stackptr, cpu); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index a2e2b61cd7da..2510c02c2d21 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -28,6 +28,7 @@ #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> #include <net/netfilter/nf_conntrack.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/checksum.h> #include <net/ip.h> @@ -57,15 +58,21 @@ struct clusterip_config { struct rcu_head rcu; }; -static LIST_HEAD(clusterip_configs); +#ifdef CONFIG_PROC_FS +static const struct file_operations clusterip_proc_fops; +#endif -/* clusterip_lock protects the clusterip_configs list */ -static DEFINE_SPINLOCK(clusterip_lock); +static int clusterip_net_id __read_mostly; + +struct clusterip_net { + struct list_head configs; + /* lock protects the configs list */ + spinlock_t lock; #ifdef CONFIG_PROC_FS -static const struct file_operations clusterip_proc_fops; -static struct proc_dir_entry *clusterip_procdir; + struct proc_dir_entry *procdir; #endif +}; static inline void clusterip_config_get(struct clusterip_config *c) @@ -92,10 +99,13 @@ clusterip_config_put(struct clusterip_config *c) static inline void clusterip_config_entry_put(struct clusterip_config *c) { + struct net *net = dev_net(c->dev); + struct clusterip_net *cn = net_generic(net, clusterip_net_id); + local_bh_disable(); - if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) { + if (atomic_dec_and_lock(&c->entries, &cn->lock)) { list_del_rcu(&c->list); - spin_unlock(&clusterip_lock); + spin_unlock(&cn->lock); local_bh_enable(); dev_mc_del(c->dev, c->clustermac); @@ -113,11 +123,12 @@ clusterip_config_entry_put(struct clusterip_config *c) } static struct clusterip_config * -__clusterip_config_find(__be32 clusterip) +__clusterip_config_find(struct net *net, __be32 clusterip) { struct clusterip_config *c; + struct clusterip_net *cn = net_generic(net, clusterip_net_id); - list_for_each_entry_rcu(c, &clusterip_configs, list) { + list_for_each_entry_rcu(c, &cn->configs, list) { if (c->clusterip == clusterip) return c; } @@ -126,12 +137,12 @@ __clusterip_config_find(__be32 clusterip) } static inline struct clusterip_config * -clusterip_config_find_get(__be32 clusterip, int entry) +clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) { struct clusterip_config *c; rcu_read_lock_bh(); - c = __clusterip_config_find(clusterip); + c = __clusterip_config_find(net, clusterip); if (c) { if (unlikely(!atomic_inc_not_zero(&c->refcount))) c = NULL; @@ -158,6 +169,7 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, struct net_device *dev) { struct clusterip_config *c; + struct clusterip_net *cn = net_generic(dev_net(dev), clusterip_net_id); c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) @@ -180,7 +192,7 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, /* create proc dir entry */ sprintf(buffer, "%pI4", &ip); c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, - clusterip_procdir, + cn->procdir, &clusterip_proc_fops, c); if (!c->pde) { kfree(c); @@ -189,9 +201,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, } #endif - spin_lock_bh(&clusterip_lock); - list_add_rcu(&c->list, &clusterip_configs); - spin_unlock_bh(&clusterip_lock); + spin_lock_bh(&cn->lock); + list_add_rcu(&c->list, &cn->configs); + spin_unlock_bh(&cn->lock); return c; } @@ -370,7 +382,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) /* FIXME: further sanity checks */ - config = clusterip_config_find_get(e->ip.dst.s_addr, 1); + config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1); if (!config) { if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { pr_info("no config found for %pI4, need 'new'\n", @@ -384,7 +396,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) return -EINVAL; } - dev = dev_get_by_name(&init_net, e->ip.iniface); + dev = dev_get_by_name(par->net, e->ip.iniface); if (!dev) { pr_info("no such interface %s\n", e->ip.iniface); @@ -492,6 +504,7 @@ arp_mangle(const struct nf_hook_ops *ops, struct arphdr *arp = arp_hdr(skb); struct arp_payload *payload; struct clusterip_config *c; + struct net *net = dev_net(in ? in : out); /* we don't care about non-ethernet and non-ipv4 ARP */ if (arp->ar_hrd != htons(ARPHRD_ETHER) || @@ -508,7 +521,7 @@ arp_mangle(const struct nf_hook_ops *ops, /* if there is no clusterip configuration for the arp reply's * source ip, we don't want to mangle it */ - c = clusterip_config_find_get(payload->src_ip, 0); + c = clusterip_config_find_get(net, payload->src_ip, 0); if (!c) return NF_ACCEPT; @@ -698,48 +711,75 @@ static const struct file_operations clusterip_proc_fops = { #endif /* CONFIG_PROC_FS */ +static int clusterip_net_init(struct net *net) +{ + struct clusterip_net *cn = net_generic(net, clusterip_net_id); + + INIT_LIST_HEAD(&cn->configs); + + spin_lock_init(&cn->lock); + +#ifdef CONFIG_PROC_FS + cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net); + if (!cn->procdir) { + pr_err("Unable to proc dir entry\n"); + return -ENOMEM; + } +#endif /* CONFIG_PROC_FS */ + + return 0; +} + +static void clusterip_net_exit(struct net *net) +{ +#ifdef CONFIG_PROC_FS + struct clusterip_net *cn = net_generic(net, clusterip_net_id); + proc_remove(cn->procdir); +#endif +} + +static struct pernet_operations clusterip_net_ops = { + .init = clusterip_net_init, + .exit = clusterip_net_exit, + .id = &clusterip_net_id, + .size = sizeof(struct clusterip_net), +}; + static int __init clusterip_tg_init(void) { int ret; - ret = xt_register_target(&clusterip_tg_reg); + ret = register_pernet_subsys(&clusterip_net_ops); if (ret < 0) return ret; + ret = xt_register_target(&clusterip_tg_reg); + if (ret < 0) + goto cleanup_subsys; + ret = nf_register_hook(&cip_arp_ops); if (ret < 0) goto cleanup_target; -#ifdef CONFIG_PROC_FS - clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net); - if (!clusterip_procdir) { - pr_err("Unable to proc dir entry\n"); - ret = -ENOMEM; - goto cleanup_hook; - } -#endif /* CONFIG_PROC_FS */ - pr_info("ClusterIP Version %s loaded successfully\n", CLUSTERIP_VERSION); + return 0; -#ifdef CONFIG_PROC_FS -cleanup_hook: - nf_unregister_hook(&cip_arp_ops); -#endif /* CONFIG_PROC_FS */ cleanup_target: xt_unregister_target(&clusterip_tg_reg); +cleanup_subsys: + unregister_pernet_subsys(&clusterip_net_ops); return ret; } static void __exit clusterip_tg_exit(void) { pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); -#ifdef CONFIG_PROC_FS - proc_remove(clusterip_procdir); -#endif + nf_unregister_hook(&cip_arp_ops); xt_unregister_target(&clusterip_tg_reg); + unregister_pernet_subsys(&clusterip_net_ops); /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */ rcu_barrier_bh(); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index cbc22158af49..9cb993cd224b 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -220,6 +220,7 @@ static void ipt_ulog_packet(struct net *net, ub->qlen++; pm = nlmsg_data(nlh); + memset(pm, 0, sizeof(*pm)); /* We might not have a timestamp, get one */ if (skb->tstamp.tv64 == 0) @@ -238,8 +239,6 @@ static void ipt_ulog_packet(struct net *net, } else if (loginfo->prefix[0] != '\0') strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix)); - else - *(pm->prefix) = '\0'; if (in && in->hard_header_len > 0 && skb->mac_header != skb->network_header && @@ -251,13 +250,9 @@ static void ipt_ulog_packet(struct net *net, if (in) strncpy(pm->indev_name, in->name, sizeof(pm->indev_name)); - else - pm->indev_name[0] = '\0'; if (out) strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); - else - pm->outdev_name[0] = '\0'; /* copy_len <= skb->len, so can't fail. */ if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0) diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 8f7536be1322..0f4cbfeb19bd 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -16,7 +16,6 @@ #include <net/netfilter/nf_tables.h> #include <net/net_namespace.h> #include <net/ip.h> -#include <net/net_namespace.h> #include <net/netfilter/nf_tables_ipv4.h> static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d5b1390eebbe..3d69ec8dac57 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -701,13 +701,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_allowed_congestion_control, }, { - .procname = "tcp_max_ssthresh", - .data = &sysctl_tcp_max_ssthresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { .procname = "tcp_thin_linear_timeouts", .data = &sysctl_tcp_thin_linear_timeouts, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index f45e1c242440..821846fb0a7e 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -140,7 +140,8 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->cnt = 1; } -static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, + u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); @@ -149,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) return; if (tp->snd_cwnd <= tp->snd_ssthresh) - tcp_slow_start(tp); + tcp_slow_start(tp, acked); else { bictcp_update(ca, tp->snd_cwnd); tcp_cong_avoid_ai(tp, ca->cnt); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 019c2389a341..ad37bf18ae4b 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -15,8 +15,6 @@ #include <linux/gfp.h> #include <net/tcp.h> -int sysctl_tcp_max_ssthresh = 0; - static DEFINE_SPINLOCK(tcp_cong_list_lock); static LIST_HEAD(tcp_cong_list); @@ -299,35 +297,24 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) } EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); -/* - * Slow start is used when congestion window is less than slow start - * threshold. This version implements the basic RFC2581 version - * and optionally supports: - * RFC3742 Limited Slow Start - growth limited to max_ssthresh - * RFC3465 Appropriate Byte Counting - growth limited by bytes acknowledged +/* Slow start is used when congestion window is no greater than the slow start + * threshold. We base on RFC2581 and also handle stretch ACKs properly. + * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but + * something better;) a packet is only considered (s)acked in its entirety to + * defend the ACK attacks described in the RFC. Slow start processes a stretch + * ACK of degree N as if N acks of degree 1 are received back to back except + * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and + * returns the leftover acks to adjust cwnd in congestion avoidance mode. */ -void tcp_slow_start(struct tcp_sock *tp) +int tcp_slow_start(struct tcp_sock *tp, u32 acked) { - int cnt; /* increase in packets */ - unsigned int delta = 0; - u32 snd_cwnd = tp->snd_cwnd; - - if (unlikely(!snd_cwnd)) { - pr_err_once("snd_cwnd is nul, please report this bug.\n"); - snd_cwnd = 1U; - } + u32 cwnd = tp->snd_cwnd + acked; - if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh) - cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */ - else - cnt = snd_cwnd; /* exponential increase */ - - tp->snd_cwnd_cnt += cnt; - while (tp->snd_cwnd_cnt >= snd_cwnd) { - tp->snd_cwnd_cnt -= snd_cwnd; - delta++; - } - tp->snd_cwnd = min(snd_cwnd + delta, tp->snd_cwnd_clamp); + if (cwnd > tp->snd_ssthresh) + cwnd = tp->snd_ssthresh + 1; + acked -= cwnd - tp->snd_cwnd; + tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); + return acked; } EXPORT_SYMBOL_GPL(tcp_slow_start); @@ -351,7 +338,7 @@ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); @@ -360,7 +347,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) /* In "safe" area, increase. */ if (tp->snd_cwnd <= tp->snd_ssthresh) - tcp_slow_start(tp); + tcp_slow_start(tp, acked); /* In dangerous area, increase slowly. */ else tcp_cong_avoid_ai(tp, tp->snd_cwnd); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index b6ae92a51f58..828e4c3ffbaf 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -304,7 +304,8 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->cnt = 1; } -static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, + u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); @@ -315,7 +316,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) if (tp->snd_cwnd <= tp->snd_ssthresh) { if (hystart && after(ack, ca->end_seq)) bictcp_hystart_reset(sk); - tcp_slow_start(tp); + tcp_slow_start(tp, acked); } else { bictcp_update(ca, tp->snd_cwnd); tcp_cong_avoid_ai(tp, ca->cnt); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 766032b4a6c3..f195d9316e55 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -8,7 +8,7 @@ #include <net/inetpeer.h> #include <net/tcp.h> -int sysctl_tcp_fastopen __read_mostly; +int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE; struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 30f27f6b3655..8ed9305dfdf4 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -109,7 +109,7 @@ static void hstcp_init(struct sock *sk) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 in_flight) +static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct hstcp *ca = inet_csk_ca(sk); @@ -118,7 +118,7 @@ static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 in_flight) return; if (tp->snd_cwnd <= tp->snd_ssthresh) - tcp_slow_start(tp); + tcp_slow_start(tp, acked); else { /* Update AIMD parameters. * diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index c1a8175361e8..4a194acfd923 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -227,7 +227,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk) return max((tp->snd_cwnd * ca->beta) >> 7, 2U); } -static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) +static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); @@ -236,7 +236,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) return; if (tp->snd_cwnd <= tp->snd_ssthresh) - tcp_slow_start(tp); + tcp_slow_start(tp, acked); else { /* In dangerous area, increase slowly. * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 57bdd17dff4d..478fe82611bf 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -85,7 +85,8 @@ static inline u32 hybla_fraction(u32 odds) * o Give cwnd a new value based on the model proposed * o remember increments <1 */ -static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) +static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked, + u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct hybla *ca = inet_csk_ca(sk); @@ -102,7 +103,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) return; if (!ca->hybla_en) { - tcp_reno_cong_avoid(sk, ack, in_flight); + tcp_reno_cong_avoid(sk, ack, acked, in_flight); return; } diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 834857f3c871..8a520996f3d2 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -256,7 +256,8 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state) /* * Increase window in response to successful acknowledgment. */ -static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) +static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked, + u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct illinois *ca = inet_csk_ca(sk); @@ -270,7 +271,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) /* In slow start */ if (tp->snd_cwnd <= tp->snd_ssthresh) - tcp_slow_start(tp); + tcp_slow_start(tp, acked); else { u32 delta; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b935397c703c..c53b7f35c51d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2903,7 +2903,8 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, * left edge of the send window. * See draft-ietf-tcplw-high-performance-00, section 3.3. */ - if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) + if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && + flag & FLAG_ACKED) seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; if (seq_rtt < 0) @@ -2918,20 +2919,25 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, } /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */ -static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req) +static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) { struct tcp_sock *tp = tcp_sk(sk); s32 seq_rtt = -1; - if (tp->lsndtime && !tp->total_retrans) - seq_rtt = tcp_time_stamp - tp->lsndtime; - tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); + if (synack_stamp && !tp->total_retrans) + seq_rtt = tcp_time_stamp - synack_stamp; + + /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets + * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack() + */ + if (!tp->srtt) + tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); } -static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) +static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) { const struct inet_connection_sock *icsk = inet_csk(sk); - icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight); + icsk->icsk_ca_ops->cong_avoid(sk, ack, acked, in_flight); tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; } @@ -3028,6 +3034,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, s32 seq_rtt = -1; s32 ca_seq_rtt = -1; ktime_t last_ackt = net_invalid_timestamp(); + bool rtt_update; while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); @@ -3104,14 +3111,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; - if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) || - (flag & FLAG_ACKED)) - tcp_rearm_rto(sk); + rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt); if (flag & FLAG_ACKED) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + tcp_rearm_rto(sk); if (unlikely(icsk->icsk_mtup.probe_size && !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { tcp_mtup_probe_success(sk); @@ -3150,6 +3156,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, ca_ops->pkts_acked(sk, pkts_acked, rtt_us); } + } else if (skb && rtt_update && sack_rtt >= 0 && + sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) { + /* Do not re-arm RTO if the sack RTT is measured from data sent + * after when the head was last (re)transmitted. Otherwise the + * timeout may continue to extend in loss recovery. + */ + tcp_rearm_rto(sk); } #if FASTRETRANS_DEBUG > 0 @@ -3441,7 +3454,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* Advance cwnd if state allows */ if (tcp_may_raise_cwnd(sk, flag)) - tcp_cong_avoid(sk, ack, prior_in_flight); + tcp_cong_avoid(sk, ack, acked, prior_in_flight); if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); @@ -5626,6 +5639,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct request_sock *req; int queued = 0; bool acceptable; + u32 synack_stamp; tp->rx_opt.saw_tstamp = 0; @@ -5708,9 +5722,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * so release it. */ if (req) { + synack_stamp = tcp_rsk(req)->snt_synack; tp->total_retrans = req->num_retrans; reqsk_fastopen_remove(sk, req, false); } else { + synack_stamp = tp->lsndtime; /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); tcp_init_congestion_control(sk); @@ -5733,7 +5749,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tp->snd_una = TCP_SKB_CB(skb)->ack_seq; tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - tcp_synack_rtt_meas(sk, req); + tcp_synack_rtt_meas(sk, synack_stamp); if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 72f7218b03f5..991d62a2f9bb 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -115,12 +115,13 @@ static void tcp_lp_init(struct sock *sk) * Will only call newReno CA when away from inference. * From TCP-LP's paper, this will be handled in additive increasement. */ -static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) +static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked, + u32 in_flight) { struct lp *lp = inet_csk_ca(sk); if (!(lp->flag & LP_WITHIN_INF)) - tcp_reno_cong_avoid(sk, ack, in_flight); + tcp_reno_cong_avoid(sk, ack, acked, in_flight); } /** diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index a7a5583eab04..a2b68a108eae 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -18,6 +18,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int sum_truesize = 0; struct tcphdr *th; unsigned int thlen; unsigned int seq; @@ -104,13 +105,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (copy_destructor) { skb->destructor = gso_skb->destructor; skb->sk = gso_skb->sk; - /* {tcp|sock}_wfree() use exact truesize accounting : - * sum(skb->truesize) MUST be exactly be gso_skb->truesize - * So we account mss bytes of 'true size' for each segment. - * The last segment will contain the remaining. - */ - skb->truesize = mss; - gso_skb->truesize -= mss; + sum_truesize += skb->truesize; } skb = skb->next; th = tcp_hdr(skb); @@ -127,7 +122,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (copy_destructor) { swap(gso_skb->sk, skb->sk); swap(gso_skb->destructor, skb->destructor); - swap(gso_skb->truesize, skb->truesize); + sum_truesize += skb->truesize; + atomic_add(sum_truesize - gso_skb->truesize, + &skb->sk->sk_wmem_alloc); } delta = htonl(oldlen + (skb_tail_pointer(skb) - diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 8ce55b8aaec8..19ea6c2951f3 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -15,7 +15,8 @@ #define TCP_SCALABLE_AI_CNT 50U #define TCP_SCALABLE_MD_SCALE 3 -static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) +static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked, + u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); @@ -23,7 +24,7 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) return; if (tp->snd_cwnd <= tp->snd_ssthresh) - tcp_slow_start(tp); + tcp_slow_start(tp, acked); else tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)); } diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 80fa2bfd7ede..06cae62bf208 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -163,13 +163,14 @@ static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) return min(tp->snd_ssthresh, tp->snd_cwnd-1); } -static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) +static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked, + u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); if (!vegas->doing_vegas_now) { - tcp_reno_cong_avoid(sk, ack, in_flight); + tcp_reno_cong_avoid(sk, ack, acked, in_flight); return; } @@ -194,7 +195,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) /* We don't have enough RTT samples to do the Vegas * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, in_flight); + tcp_reno_cong_avoid(sk, ack, acked, in_flight); } else { u32 rtt, diff; u64 target_cwnd; @@ -243,7 +244,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) } else if (tp->snd_cwnd <= tp->snd_ssthresh) { /* Slow start. */ - tcp_slow_start(tp); + tcp_slow_start(tp, acked); } else { /* Congestion avoidance. */ @@ -283,7 +284,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) } /* Use normal slow start */ else if (tp->snd_cwnd <= tp->snd_ssthresh) - tcp_slow_start(tp); + tcp_slow_start(tp, acked); } diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index ac43cd747bce..326475a94865 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -114,13 +114,14 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event) tcp_veno_init(sk); } -static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) +static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked, + u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct veno *veno = inet_csk_ca(sk); if (!veno->doing_veno_now) { - tcp_reno_cong_avoid(sk, ack, in_flight); + tcp_reno_cong_avoid(sk, ack, acked, in_flight); return; } @@ -133,7 +134,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) /* We don't have enough rtt samples to do the Veno * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, in_flight); + tcp_reno_cong_avoid(sk, ack, acked, in_flight); } else { u64 target_cwnd; u32 rtt; @@ -152,7 +153,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) if (tp->snd_cwnd <= tp->snd_ssthresh) { /* Slow start. */ - tcp_slow_start(tp); + tcp_slow_start(tp, acked); } else { /* Congestion avoidance. */ if (veno->diff < beta) { diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 05c3b6f0e8e1..a347a078ee07 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -69,7 +69,8 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us) tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us); } -static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) +static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked, + u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct yeah *yeah = inet_csk_ca(sk); @@ -78,7 +79,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) return; if (tp->snd_cwnd <= tp->snd_ssthresh) - tcp_slow_start(tp); + tcp_slow_start(tp, acked); else if (!yeah->doing_reno_now) { /* Scalable */ diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index ccde54248c8c..e1a63930a967 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -104,10 +104,14 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) const struct iphdr *iph = ip_hdr(skb); u8 *xprth = skb_network_header(skb) + iph->ihl * 4; struct flowi4 *fl4 = &fl->u.ip4; + int oif = 0; + + if (skb_dst(skb)) + oif = skb_dst(skb)->dev->ifindex; memset(fl4, 0, sizeof(struct flowi4)); fl4->flowi4_mark = skb->mark; - fl4->flowi4_oif = skb_dst(skb)->dev->ifindex; + fl4->flowi4_oif = reverse ? skb->skb_iif : oif; if (!ip_is_fragment(iph)) { switch (iph->protocol) { @@ -236,7 +240,7 @@ static struct dst_ops xfrm4_dst_ops = { .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, - .gc_thresh = 1024, + .gc_thresh = 32768, }; static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e67e63f9858d..b8719df0366e 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -164,10 +164,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) u8 *iv; u8 *tail; __be32 *seqhi; - struct esp_data *esp = x->data; /* skb is pure payload to encrypt */ - aead = esp->aead; + aead = x->data; alen = crypto_aead_authsize(aead); tfclen = 0; @@ -181,8 +180,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) } blksize = ALIGN(crypto_aead_blocksize(aead), 4); clen = ALIGN(skb->len + 2 + tfclen, blksize); - if (esp->padlen) - clen = ALIGN(clen, esp->padlen); plen = clen - skb->len - tfclen; err = skb_cow_data(skb, tfclen + plen + alen, &trailer); @@ -271,8 +268,7 @@ error: static int esp_input_done2(struct sk_buff *skb, int err) { struct xfrm_state *x = xfrm_input_state(skb); - struct esp_data *esp = x->data; - struct crypto_aead *aead = esp->aead; + struct crypto_aead *aead = x->data; int alen = crypto_aead_authsize(aead); int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); int elen = skb->len - hlen; @@ -325,8 +321,7 @@ static void esp_input_done(struct crypto_async_request *base, int err) static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) { struct ip_esp_hdr *esph; - struct esp_data *esp = x->data; - struct crypto_aead *aead = esp->aead; + struct crypto_aead *aead = x->data; struct aead_request *req; struct sk_buff *trailer; int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); @@ -414,9 +409,8 @@ out: static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) { - struct esp_data *esp = x->data; - u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); - u32 align = max_t(u32, blksize, esp->padlen); + struct crypto_aead *aead = x->data; + u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4); unsigned int net_adj; if (x->props.mode != XFRM_MODE_TUNNEL) @@ -424,8 +418,8 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) else net_adj = 0; - return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - - net_adj) & ~(align - 1)) + net_adj - 2; + return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - + net_adj) & ~(blksize - 1)) + net_adj - 2; } static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, @@ -454,18 +448,16 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, static void esp6_destroy(struct xfrm_state *x) { - struct esp_data *esp = x->data; + struct crypto_aead *aead = x->data; - if (!esp) + if (!aead) return; - crypto_free_aead(esp->aead); - kfree(esp); + crypto_free_aead(aead); } static int esp_init_aead(struct xfrm_state *x) { - struct esp_data *esp = x->data; struct crypto_aead *aead; int err; @@ -474,7 +466,7 @@ static int esp_init_aead(struct xfrm_state *x) if (IS_ERR(aead)) goto error; - esp->aead = aead; + x->data = aead; err = crypto_aead_setkey(aead, x->aead->alg_key, (x->aead->alg_key_len + 7) / 8); @@ -491,7 +483,6 @@ error: static int esp_init_authenc(struct xfrm_state *x) { - struct esp_data *esp = x->data; struct crypto_aead *aead; struct crypto_authenc_key_param *param; struct rtattr *rta; @@ -526,7 +517,7 @@ static int esp_init_authenc(struct xfrm_state *x) if (IS_ERR(aead)) goto error; - esp->aead = aead; + x->data = aead; keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) + (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param)); @@ -581,7 +572,6 @@ error: static int esp6_init_state(struct xfrm_state *x) { - struct esp_data *esp; struct crypto_aead *aead; u32 align; int err; @@ -589,11 +579,7 @@ static int esp6_init_state(struct xfrm_state *x) if (x->encap) return -EINVAL; - esp = kzalloc(sizeof(*esp), GFP_KERNEL); - if (esp == NULL) - return -ENOMEM; - - x->data = esp; + x->data = NULL; if (x->aead) err = esp_init_aead(x); @@ -603,9 +589,7 @@ static int esp6_init_state(struct xfrm_state *x) if (err) goto error; - aead = esp->aead; - - esp->padlen = 0; + aead = x->data; x->props.header_len = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); @@ -625,9 +609,7 @@ static int esp6_init_state(struct xfrm_state *x) } align = ALIGN(crypto_aead_blocksize(aead), 4); - if (esp->padlen) - align = max_t(u32, align, esp->padlen); - x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead); + x->props.trailer_len = align + 1 + crypto_aead_authsize(aead); error: return err; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 44400c216dc6..710238f58aa9 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -349,6 +349,11 @@ ip6t_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 56eef30ee5f6..da00a2ecde55 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -39,7 +39,7 @@ MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6"); MODULE_LICENSE("GPL"); /* Send RST reply */ -static void send_reset(struct net *net, struct sk_buff *oldskb) +static void send_reset(struct net *net, struct sk_buff *oldskb, int hook) { struct sk_buff *nskb; struct tcphdr otcph, *tcph; @@ -88,8 +88,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) } /* Check checksum. */ - if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP, - skb_checksum(oldskb, tcphoff, otcplen, 0))) { + if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { pr_debug("TCP checksum is invalid\n"); return; } @@ -227,7 +226,7 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) /* Do nothing */ break; case IP6T_TCP_RESET: - send_reset(net, skb); + send_reset(net, skb, par->hooknum); break; default: net_info_ratelimited("case %u not handled yet\n", reject->with); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1ac0b6e17d95..fd399ac6c1f7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1087,10 +1087,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev))) return NULL; - if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) - return dst; + if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) + return NULL; - return NULL; + if (rt6_check_expired(rt)) + return NULL; + + return dst; } static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 08ed2772b7aa..5f8e128c512d 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -135,10 +135,14 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) struct ipv6_opt_hdr *exthdr; const unsigned char *nh = skb_network_header(skb); u8 nexthdr = nh[IP6CB(skb)->nhoff]; + int oif = 0; + + if (skb_dst(skb)) + oif = skb_dst(skb)->dev->ifindex; memset(fl6, 0, sizeof(struct flowi6)); fl6->flowi6_mark = skb->mark; - fl6->flowi6_oif = skb_dst(skb)->dev->ifindex; + fl6->flowi6_oif = reverse ? skb->skb_iif : oif; fl6->daddr = reverse ? hdr->saddr : hdr->daddr; fl6->saddr = reverse ? hdr->daddr : hdr->saddr; @@ -285,7 +289,7 @@ static struct dst_ops xfrm6_dst_ops = { .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, - .gc_thresh = 1024, + .gc_thresh = 32768, }; static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index 2ca2f4dceab7..e24bcf977296 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -208,6 +208,8 @@ static int mac802154_header_create(struct sk_buff *skb, head[1] = fc >> 8; memcpy(skb_push(skb, pos), head, pos); + skb_reset_mac_header(skb); + skb->mac_len = pos; return pos; } diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index a13e15be7911..f2c7d83dc23f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -198,13 +198,14 @@ mtype_list(const struct ip_set *set, struct mtype *map = set->data; struct nlattr *adt, *nested; void *x; - u32 id, first = cb->args[2]; + u32 id, first = cb->args[IPSET_CB_ARG0]; adt = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!adt) return -EMSGSIZE; - for (; cb->args[2] < map->elements; cb->args[2]++) { - id = cb->args[2]; + for (; cb->args[IPSET_CB_ARG0] < map->elements; + cb->args[IPSET_CB_ARG0]++) { + id = cb->args[IPSET_CB_ARG0]; x = get_ext(set, map, id); if (!test_bit(id, map->members) || (SET_WITH_TIMEOUT(set) && @@ -231,14 +232,14 @@ mtype_list(const struct ip_set *set, ipset_nest_end(skb, adt); /* Set listing finished */ - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return 0; nla_put_failure: nla_nest_cancel(skb, nested); if (unlikely(id == first)) { - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } ipset_nest_end(skb, adt); diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index e7603c5b53d7..cf99676e69f8 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -254,7 +254,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], return -ENOMEM; map->elements = last_port - first_port + 1; - map->memsize = map->elements * sizeof(unsigned long); + map->memsize = bitmap_bytes(0, map->elements); set->variant = &bitmap_port; set->dsize = ip_set_elem_len(set, tb, 0); if (!init_map_port(set, map, first_port, last_port)) { diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index dc9284bdd2dd..bac7e01df67f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1182,10 +1182,12 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, static int ip_set_dump_done(struct netlink_callback *cb) { - struct ip_set_net *inst = (struct ip_set_net *)cb->data; - if (cb->args[2]) { - pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name); - __ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]); + struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; + if (cb->args[IPSET_CB_ARG0]) { + pr_debug("release set %s\n", + nfnl_set(inst, cb->args[IPSET_CB_INDEX])->name); + __ip_set_put_byindex(inst, + (ip_set_id_t) cb->args[IPSET_CB_INDEX]); } return 0; } @@ -1203,7 +1205,7 @@ dump_attrs(struct nlmsghdr *nlh) } static int -dump_init(struct netlink_callback *cb) +dump_init(struct netlink_callback *cb, struct ip_set_net *inst) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); @@ -1211,15 +1213,15 @@ dump_init(struct netlink_callback *cb) struct nlattr *attr = (void *)nlh + min_len; u32 dump_type; ip_set_id_t index; - struct ip_set_net *inst = (struct ip_set_net *)cb->data; /* Second pass, so parser can't fail */ nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); - /* cb->args[0] : dump single set/all sets - * [1] : set index - * [..]: type specific + /* cb->args[IPSET_CB_NET]: net namespace + * [IPSET_CB_DUMP]: dump single set/all sets + * [IPSET_CB_INDEX]: set index + * [IPSET_CB_ARG0]: type specific */ if (cda[IPSET_ATTR_SETNAME]) { @@ -1231,7 +1233,7 @@ dump_init(struct netlink_callback *cb) return -ENOENT; dump_type = DUMP_ONE; - cb->args[1] = index; + cb->args[IPSET_CB_INDEX] = index; } else dump_type = DUMP_ALL; @@ -1239,7 +1241,8 @@ dump_init(struct netlink_callback *cb) u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]); dump_type |= (f << 16); } - cb->args[0] = dump_type; + cb->args[IPSET_CB_NET] = (unsigned long)inst; + cb->args[IPSET_CB_DUMP] = dump_type; return 0; } @@ -1251,12 +1254,12 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) struct ip_set *set = NULL; struct nlmsghdr *nlh = NULL; unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; + struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); u32 dump_type, dump_flags; int ret = 0; - struct ip_set_net *inst = (struct ip_set_net *)cb->data; - if (!cb->args[0]) { - ret = dump_init(cb); + if (!cb->args[IPSET_CB_DUMP]) { + ret = dump_init(cb, inst); if (ret < 0) { nlh = nlmsg_hdr(cb->skb); /* We have to create and send the error message @@ -1267,17 +1270,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) } } - if (cb->args[1] >= inst->ip_set_max) + if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max) goto out; - dump_type = DUMP_TYPE(cb->args[0]); - dump_flags = DUMP_FLAGS(cb->args[0]); - max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max; + dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]); + dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]); + max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1 + : inst->ip_set_max; dump_last: - pr_debug("args[0]: %u %u args[1]: %ld\n", - dump_type, dump_flags, cb->args[1]); - for (; cb->args[1] < max; cb->args[1]++) { - index = (ip_set_id_t) cb->args[1]; + pr_debug("dump type, flag: %u %u index: %ld\n", + dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); + for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { + index = (ip_set_id_t) cb->args[IPSET_CB_INDEX]; set = nfnl_set(inst, index); if (set == NULL) { if (dump_type == DUMP_ONE) { @@ -1294,7 +1298,7 @@ dump_last: !!(set->type->features & IPSET_DUMP_LAST))) continue; pr_debug("List set: %s\n", set->name); - if (!cb->args[2]) { + if (!cb->args[IPSET_CB_ARG0]) { /* Start listing: make sure set won't be destroyed */ pr_debug("reference set\n"); __ip_set_get(set); @@ -1311,7 +1315,7 @@ dump_last: goto nla_put_failure; if (dump_flags & IPSET_FLAG_LIST_SETNAME) goto next_set; - switch (cb->args[2]) { + switch (cb->args[IPSET_CB_ARG0]) { case 0: /* Core header data */ if (nla_put_string(skb, IPSET_ATTR_TYPENAME, @@ -1331,7 +1335,7 @@ dump_last: read_lock_bh(&set->lock); ret = set->variant->list(set, skb, cb); read_unlock_bh(&set->lock); - if (!cb->args[2]) + if (!cb->args[IPSET_CB_ARG0]) /* Set is done, proceed with next one */ goto next_set; goto release_refcount; @@ -1340,8 +1344,8 @@ dump_last: /* If we dump all sets, continue with dumping last ones */ if (dump_type == DUMP_ALL) { dump_type = DUMP_LAST; - cb->args[0] = dump_type | (dump_flags << 16); - cb->args[1] = 0; + cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16); + cb->args[IPSET_CB_INDEX] = 0; goto dump_last; } goto out; @@ -1350,15 +1354,15 @@ nla_put_failure: ret = -EFAULT; next_set: if (dump_type == DUMP_ONE) - cb->args[1] = IPSET_INVALID_ID; + cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID; else - cb->args[1]++; + cb->args[IPSET_CB_INDEX]++; release_refcount: /* If there was an error or set is done, release set */ - if (ret || !cb->args[2]) { + if (ret || !cb->args[IPSET_CB_ARG0]) { pr_debug("release set %s\n", nfnl_set(inst, index)->name); __ip_set_put_byindex(inst, index); - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; } out: if (nlh) { @@ -1375,8 +1379,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); - if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; @@ -1384,7 +1386,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, struct netlink_dump_control c = { .dump = ip_set_dump_start, .done = ip_set_dump_done, - .data = (void *)inst }; return netlink_dump_start(ctnl, skb, nlh, &c); } @@ -1961,7 +1962,6 @@ static int __net_init ip_set_net_init(struct net *net) { struct ip_set_net *inst = ip_set_pernet(net); - struct ip_set **list; inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 6a80dbd30df7..be6932ad3a86 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -234,7 +234,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) #define mtype MTYPE -#define mtype_elem IPSET_TOKEN(MTYPE, _elem) #define mtype_add IPSET_TOKEN(MTYPE, _add) #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) @@ -931,7 +930,7 @@ mtype_list(const struct ip_set *set, struct nlattr *atd, *nested; const struct hbucket *n; const struct mtype_elem *e; - u32 first = cb->args[2]; + u32 first = cb->args[IPSET_CB_ARG0]; /* We assume that one hash bucket fills into one page */ void *incomplete; int i; @@ -940,20 +939,22 @@ mtype_list(const struct ip_set *set, if (!atd) return -EMSGSIZE; pr_debug("list hash set %s\n", set->name); - for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) { + for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); + cb->args[IPSET_CB_ARG0]++) { incomplete = skb_tail_pointer(skb); - n = hbucket(t, cb->args[2]); - pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n); + n = hbucket(t, cb->args[IPSET_CB_ARG0]); + pr_debug("cb->arg bucket: %lu, t %p n %p\n", + cb->args[IPSET_CB_ARG0], t, n); for (i = 0; i < n->pos; i++) { e = ahash_data(n, i, set->dsize); if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; pr_debug("list hash %lu hbucket %p i %u, data %p\n", - cb->args[2], n, i, e); + cb->args[IPSET_CB_ARG0], n, i, e); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { - if (cb->args[2] == first) { + if (cb->args[IPSET_CB_ARG0] == first) { nla_nest_cancel(skb, atd); return -EMSGSIZE; } else @@ -968,16 +969,16 @@ mtype_list(const struct ip_set *set, } ipset_nest_end(skb, atd); /* Set listing finished */ - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return 0; nla_put_failure: nlmsg_trim(skb, incomplete); - if (unlikely(first == cb->args[2])) { + if (unlikely(first == cb->args[IPSET_CB_ARG0])) { pr_warning("Can't list set %s: one bucket does not fit into " "a message. Please report it!\n", set->name); - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } ipset_nest_end(skb, atd); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 426032706ca9..2bc2dec20b00 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -137,12 +137,11 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet4_elem e = { - .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, - .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK, - }; + struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; @@ -160,14 +159,14 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet4_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, last; u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2; u8 cidr, cidr2; int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || @@ -364,12 +363,11 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet6_elem e = { - .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, - .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK - }; + struct hash_netnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK; @@ -386,11 +384,11 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet6_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 363fab933d48..703d1192a6a2 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -147,12 +147,11 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet4_elem e = { - .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), - .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), - }; + struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; @@ -174,8 +173,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet4_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to; u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; @@ -183,6 +181,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], u8 cidr, cidr2; int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || @@ -419,12 +418,11 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet6_elem e = { - .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), - .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), - }; + struct hash_netportnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK; @@ -446,13 +444,13 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet6_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netportnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index ec6f6d15dded..3e2317f3cf68 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -490,14 +490,15 @@ list_set_list(const struct ip_set *set, { const struct list_set *map = set->data; struct nlattr *atd, *nested; - u32 i, first = cb->args[2]; + u32 i, first = cb->args[IPSET_CB_ARG0]; const struct set_elem *e; atd = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!atd) return -EMSGSIZE; - for (; cb->args[2] < map->size; cb->args[2]++) { - i = cb->args[2]; + for (; cb->args[IPSET_CB_ARG0] < map->size; + cb->args[IPSET_CB_ARG0]++) { + i = cb->args[IPSET_CB_ARG0]; e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto finish; @@ -522,13 +523,13 @@ list_set_list(const struct ip_set *set, finish: ipset_nest_end(skb, atd); /* Set listing finished */ - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return 0; nla_put_failure: nla_nest_cancel(skb, nested); if (unlikely(i == first)) { - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } ipset_nest_end(skb, atd); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a3df9bddc4f7..62786a495cea 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -704,7 +704,7 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest) __ip_vs_dst_cache_reset(dest); __ip_vs_svc_put(svc, false); free_percpu(dest->stats.cpustats); - kfree(dest); + ip_vs_dest_put_and_free(dest); } /* @@ -3820,10 +3820,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); - /* Some dest can be in grace period even before cleanup, we have to - * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called. - */ - rcu_barrier(); ip_vs_trash_cleanup(net); ip_vs_stop_estimator(net, &ipvs->tot_stats); ip_vs_control_net_cleanup_sysctl(net); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index eff13c94498e..ca056a331e60 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -136,7 +136,7 @@ static void ip_vs_lblc_rcu_free(struct rcu_head *head) struct ip_vs_lblc_entry, rcu_head); - ip_vs_dest_put(en->dest); + ip_vs_dest_put_and_free(en->dest); kfree(en); } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 0b8550089a2e..3f21a2f47de1 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -130,7 +130,7 @@ static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head) struct ip_vs_dest_set_elem *e; e = container_of(head, struct ip_vs_dest_set_elem, rcu_head); - ip_vs_dest_put(e->dest); + ip_vs_dest_put_and_free(e->dest); kfree(e); } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 23e596e438b3..2f7ea7564044 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -20,13 +20,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, sctp_sctphdr_t *sh, _sctph; sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); - if (sh == NULL) + if (sh == NULL) { + *verdict = NF_DROP; return 0; + } sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), sizeof(_schunkh), &_schunkh); - if (sch == NULL) + if (sch == NULL) { + *verdict = NF_DROP; return 0; + } + net = skb_net(skb); ipvs = net_ipvs(net); rcu_read_lock(); @@ -76,6 +81,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; + bool payload_csum = false; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -87,19 +93,31 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; if (unlikely(cp->app != NULL)) { + int ret; + /* Some checks before mangling */ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, skb)) + ret = ip_vs_app_pkt_out(cp, skb); + if (ret == 0) return 0; + /* ret=2: csum update is needed after payload mangling */ + if (ret == 2) + payload_csum = true; } sctph = (void *) skb_network_header(skb) + sctphoff; - sctph->source = cp->vport; - sctp_nat_csum(skb, sctph, sctphoff); + /* Only update csum if we really have to */ + if (sctph->source != cp->vport || payload_csum || + skb->ip_summed == CHECKSUM_PARTIAL) { + sctph->source = cp->vport; + sctp_nat_csum(skb, sctph, sctphoff); + } else { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } return 1; } @@ -110,6 +128,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; + bool payload_csum = false; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -121,19 +140,32 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; if (unlikely(cp->app != NULL)) { + int ret; + /* Some checks before mangling */ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_in(cp, skb)) + ret = ip_vs_app_pkt_in(cp, skb); + if (ret == 0) return 0; + /* ret=2: csum update is needed after payload mangling */ + if (ret == 2) + payload_csum = true; } sctph = (void *) skb_network_header(skb) + sctphoff; - sctph->dest = cp->dport; - sctp_nat_csum(skb, sctph, sctphoff); + /* Only update csum if we really have to */ + if (sctph->dest != cp->dport || payload_csum || + (skb->ip_summed == CHECKSUM_PARTIAL && + !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) { + sctph->dest = cp->dport; + sctp_nat_csum(skb, sctph, sctphoff); + } else if (skb->ip_summed != CHECKSUM_PARTIAL) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } return 1; } diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 3588faebe529..cc65b2f42cd4 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -115,27 +115,46 @@ ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s, } -/* As ip_vs_sh_get, but with fallback if selected server is unavailable */ +/* As ip_vs_sh_get, but with fallback if selected server is unavailable + * + * The fallback strategy loops around the table starting from a "random" + * point (in fact, it is chosen to be the original hash value to make the + * algorithm deterministic) to find a new server. + */ static inline struct ip_vs_dest * ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s, const union nf_inet_addr *addr, __be16 port) { - unsigned int offset; - unsigned int hash; + unsigned int offset, roffset; + unsigned int hash, ihash; struct ip_vs_dest *dest; + /* first try the dest it's supposed to go to */ + ihash = ip_vs_sh_hashkey(svc->af, addr, port, 0); + dest = rcu_dereference(s->buckets[ihash].dest); + if (!dest) + return NULL; + if (!is_unavailable(dest)) + return dest; + + IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting", + IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); + + /* if the original dest is unavailable, loop around the table + * starting from ihash to find a new dest + */ for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) { - hash = ip_vs_sh_hashkey(svc->af, addr, port, offset); + roffset = (offset + ihash) % IP_VS_SH_TAB_SIZE; + hash = ip_vs_sh_hashkey(svc->af, addr, port, roffset); dest = rcu_dereference(s->buckets[hash].dest); if (!dest) break; - if (is_unavailable(dest)) - IP_VS_DBG_BUF(6, "SH: selected unavailable server " - "%s:%d (offset %d)", - IP_VS_DBG_ADDR(svc->af, &dest->addr), - ntohs(dest->port), offset); - else + if (!is_unavailable(dest)) return dest; + IP_VS_DBG_BUF(6, "SH: selected unavailable " + "server %s:%d (offset %d), reselecting", + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port), roffset); } return NULL; diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 2d3030ab5b61..a4b5e2a435ac 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -39,21 +39,23 @@ static struct ctl_table acct_sysctl_table[] = { unsigned int seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) { - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; + struct nf_conn_counter *counter; acct = nf_conn_acct_find(ct); if (!acct) return 0; + counter = acct->counter; return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)atomic64_read(&acct[dir].packets), - (unsigned long long)atomic64_read(&acct[dir].bytes)); + (unsigned long long)atomic64_read(&counter[dir].packets), + (unsigned long long)atomic64_read(&counter[dir].bytes)); }; EXPORT_SYMBOL_GPL(seq_print_acct); static struct nf_ct_ext_type acct_extend __read_mostly = { - .len = sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]), - .align = __alignof__(struct nf_conn_counter[IP_CT_DIR_MAX]), + .len = sizeof(struct nf_conn_acct), + .align = __alignof__(struct nf_conn_acct), .id = NF_CT_EXT_ACCT, }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5d892febd64c..e22d950c60b3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1109,12 +1109,14 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, acct: if (do_acct) { - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; acct = nf_conn_acct_find(ct); if (acct) { - atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); - atomic64_add(skb->len, &acct[CTINFO2DIR(ctinfo)].bytes); + struct nf_conn_counter *counter = acct->counter; + + atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); + atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes); } } } @@ -1126,13 +1128,15 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, int do_acct) { if (do_acct) { - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; acct = nf_conn_acct_find(ct); if (acct) { - atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); + struct nf_conn_counter *counter = acct->counter; + + atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); atomic64_add(skb->len - skb_network_offset(skb), - &acct[CTINFO2DIR(ctinfo)].bytes); + &counter[CTINFO2DIR(ctinfo)].bytes); } } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index eea936b70d15..08870b859046 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -211,13 +211,23 @@ nla_put_failure: } static int -dump_counters(struct sk_buff *skb, u64 pkts, u64 bytes, - enum ip_conntrack_dir dir) +dump_counters(struct sk_buff *skb, struct nf_conn_acct *acct, + enum ip_conntrack_dir dir, int type) { - enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; + enum ctattr_type attr = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; + struct nf_conn_counter *counter = acct->counter; struct nlattr *nest_count; + u64 pkts, bytes; - nest_count = nla_nest_start(skb, type | NLA_F_NESTED); + if (type == IPCTNL_MSG_CT_GET_CTRZERO) { + pkts = atomic64_xchg(&counter[dir].packets, 0); + bytes = atomic64_xchg(&counter[dir].bytes, 0); + } else { + pkts = atomic64_read(&counter[dir].packets); + bytes = atomic64_read(&counter[dir].bytes); + } + + nest_count = nla_nest_start(skb, attr | NLA_F_NESTED); if (!nest_count) goto nla_put_failure; @@ -234,24 +244,19 @@ nla_put_failure: } static int -ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, - enum ip_conntrack_dir dir, int type) +ctnetlink_dump_acct(struct sk_buff *skb, const struct nf_conn *ct, int type) { - struct nf_conn_counter *acct; - u64 pkts, bytes; + struct nf_conn_acct *acct = nf_conn_acct_find(ct); - acct = nf_conn_acct_find(ct); if (!acct) return 0; - if (type == IPCTNL_MSG_CT_GET_CTRZERO) { - pkts = atomic64_xchg(&acct[dir].packets, 0); - bytes = atomic64_xchg(&acct[dir].bytes, 0); - } else { - pkts = atomic64_read(&acct[dir].packets); - bytes = atomic64_read(&acct[dir].bytes); - } - return dump_counters(skb, pkts, bytes, dir); + if (dump_counters(skb, acct, IP_CT_DIR_ORIGINAL, type) < 0) + return -1; + if (dump_counters(skb, acct, IP_CT_DIR_REPLY, type) < 0) + return -1; + + return 0; } static int @@ -488,8 +493,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, if (ctnetlink_dump_status(skb, ct) < 0 || ctnetlink_dump_timeout(skb, ct) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL, type) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY, type) < 0 || + ctnetlink_dump_acct(skb, ct, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0 || ctnetlink_dump_protoinfo(skb, ct) < 0 || ctnetlink_dump_helpinfo(skb, ct) < 0 || @@ -530,7 +534,7 @@ ctnetlink_proto_size(const struct nf_conn *ct) } static inline size_t -ctnetlink_counters_size(const struct nf_conn *ct) +ctnetlink_acct_size(const struct nf_conn *ct) { if (!nf_ct_ext_exist(ct, NF_CT_EXT_ACCT)) return 0; @@ -579,7 +583,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */ + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ - + ctnetlink_counters_size(ct) + + ctnetlink_acct_size(ct) + ctnetlink_timestamp_size(ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ + nla_total_size(0) /* CTA_PROTOINFO */ @@ -673,10 +677,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; if (events & (1 << IPCT_DESTROY)) { - if (ctnetlink_dump_counters(skb, ct, - IP_CT_DIR_ORIGINAL, type) < 0 || - ctnetlink_dump_counters(skb, ct, - IP_CT_DIR_REPLY, type) < 0 || + if (ctnetlink_dump_acct(skb, ct, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0) goto nla_put_failure; } else { diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 4811f762e060..a82667c64729 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -634,9 +634,9 @@ nft_match_select_ops(const struct nft_ctx *ctx, static void nft_match_release(void) { - struct nft_xt *nft_match; + struct nft_xt *nft_match, *tmp; - list_for_each_entry(nft_match, &nft_match_list, head) + list_for_each_entry_safe(nft_match, tmp, &nft_match_list, head) kfree(nft_match); } @@ -705,9 +705,9 @@ nft_target_select_ops(const struct nft_ctx *ctx, static void nft_target_release(void) { - struct nft_xt *nft_target; + struct nft_xt *nft_target, *tmp; - list_for_each_entry(nft_target, &nft_target_list, head) + list_for_each_entry_safe(nft_target, tmp, &nft_target_list, head) kfree(nft_target); } diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index b0b87b2d2411..d3b1ffe26181 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -47,8 +47,10 @@ static void nft_nat_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); if (priv->sreg_addr_min) { if (priv->family == AF_INET) { - range.min_addr.ip = data[priv->sreg_addr_min].data[0]; - range.max_addr.ip = data[priv->sreg_addr_max].data[0]; + range.min_addr.ip = (__force __be32) + data[priv->sreg_addr_min].data[0]; + range.max_addr.ip = (__force __be32) + data[priv->sreg_addr_max].data[0]; } else { memcpy(range.min_addr.ip6, @@ -62,8 +64,10 @@ static void nft_nat_eval(const struct nft_expr *expr, } if (priv->sreg_proto_min) { - range.min_proto.all = data[priv->sreg_proto_min].data[0]; - range.max_proto.all = data[priv->sreg_proto_max].data[0]; + range.min_proto.all = (__force __be16) + data[priv->sreg_proto_min].data[0]; + range.max_proto.all = (__force __be16) + data[priv->sreg_proto_max].data[0]; range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8b03028cca69..227aa11e8409 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -845,8 +845,13 @@ xt_replace_table(struct xt_table *table, return NULL; } - table->private = newinfo; newinfo->initial_entries = private->initial_entries; + /* + * Ensure contents of newinfo are visible before assigning to + * private. + */ + smp_wmb(); + table->private = newinfo; /* * Even though table entries have now been swapped, other CPU's diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 1e2fae32f81b..ed00fef58996 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -147,6 +147,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v3 *info = par->targinfo; u32 queue = info->queuenum; + int ret; if (info->queues_total > 1) { if (info->flags & NFQ_FLAG_CPU_FANOUT) { @@ -157,7 +158,11 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) queue = nfqueue_hash(skb, par); } - return NF_QUEUE_NR(queue); + ret = NF_QUEUE_NR(queue); + if (info->flags & NFQ_FLAG_BYPASS) + ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; + + return ret; } static struct xt_target nfqueue_tg_reg[] __read_mostly = { diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index e595e07a759b..1e634615ab9d 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -26,16 +26,18 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par) u_int64_t what = 0; /* initialize to make gcc happy */ u_int64_t bytes = 0; u_int64_t pkts = 0; + const struct nf_conn_acct *acct; const struct nf_conn_counter *counters; ct = nf_ct_get(skb, &ctinfo); if (!ct) return false; - counters = nf_conn_acct_find(ct); - if (!counters) + acct = nf_conn_acct_find(ct); + if (!acct) return false; + counters = acct->counter; switch (sinfo->what) { case XT_CONNBYTES_PKTS: switch (sinfo->direction) { diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 3dd0e374bc2b..1ba67931eb1b 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -35,15 +35,6 @@ #include <net/netfilter/nf_conntrack.h> #endif -static void -xt_socket_put_sk(struct sock *sk) -{ - if (sk->sk_state == TCP_TIME_WAIT) - inet_twsk_put(inet_twsk(sk)); - else - sock_put(sk); -} - static int extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, @@ -216,7 +207,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, inet_twsk(sk)->tw_transparent)); if (sk != skb->sk) - xt_socket_put_sk(sk); + sock_gen_put(sk); if (wildcard || !transparent) sk = NULL; @@ -381,7 +372,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) inet_twsk(sk)->tw_transparent)); if (sk != skb->sk) - xt_socket_put_sk(sk); + sock_gen_put(sk); if (wildcard || !transparent) sk = NULL; diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index ea36e99089af..3591cb5dae91 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile @@ -9,6 +9,8 @@ openvswitch-y := \ datapath.o \ dp_notify.o \ flow.o \ + flow_netlink.o \ + flow_table.o \ vport.o \ vport-internal_dev.o \ vport-netdev.o diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2aa13bd7f2b2..1408adc2a2a7 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -55,14 +55,10 @@ #include "datapath.h" #include "flow.h" +#include "flow_netlink.h" #include "vport-internal_dev.h" #include "vport-netdev.h" - -#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) -static void rehash_flow_table(struct work_struct *work); -static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); - int ovs_net_id __read_mostly; static void ovs_notify(struct sk_buff *skb, struct genl_info *info, @@ -165,7 +161,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu) { struct datapath *dp = container_of(rcu, struct datapath, rcu); - ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false); + ovs_flow_tbl_destroy(&dp->table); free_percpu(dp->stats_percpu); release_net(ovs_dp_get_net(dp)); kfree(dp->ports); @@ -225,6 +221,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) struct dp_stats_percpu *stats; struct sw_flow_key key; u64 *stats_counter; + u32 n_mask_hit; int error; stats = this_cpu_ptr(dp->stats_percpu); @@ -237,7 +234,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) } /* Look up flow. */ - flow = ovs_flow_lookup(rcu_dereference(dp->table), &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key, &n_mask_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; @@ -262,6 +259,7 @@ out: /* Update datapath statistics. */ u64_stats_update_begin(&stats->sync); (*stats_counter)++; + stats->n_mask_hit += n_mask_hit; u64_stats_update_end(&stats->sync); } @@ -435,7 +433,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, upcall->dp_ifindex = dp_ifindex; nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); - ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb); + ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb); nla_nest_end(user_skb, nla); if (upcall_info->userdata) @@ -455,398 +453,6 @@ out: return err; } -/* Called with ovs_mutex. */ -static int flush_flows(struct datapath *dp) -{ - struct flow_table *old_table; - struct flow_table *new_table; - - old_table = ovsl_dereference(dp->table); - new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); - if (!new_table) - return -ENOMEM; - - rcu_assign_pointer(dp->table, new_table); - - ovs_flow_tbl_destroy(old_table, true); - return 0; -} - -static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len) -{ - - struct sw_flow_actions *acts; - int new_acts_size; - int req_size = NLA_ALIGN(attr_len); - int next_offset = offsetof(struct sw_flow_actions, actions) + - (*sfa)->actions_len; - - if (req_size <= (ksize(*sfa) - next_offset)) - goto out; - - new_acts_size = ksize(*sfa) * 2; - - if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) - return ERR_PTR(-EMSGSIZE); - new_acts_size = MAX_ACTIONS_BUFSIZE; - } - - acts = ovs_flow_actions_alloc(new_acts_size); - if (IS_ERR(acts)) - return (void *)acts; - - memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); - acts->actions_len = (*sfa)->actions_len; - kfree(*sfa); - *sfa = acts; - -out: - (*sfa)->actions_len += req_size; - return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); -} - -static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) -{ - struct nlattr *a; - - a = reserve_sfa_size(sfa, nla_attr_size(len)); - if (IS_ERR(a)) - return PTR_ERR(a); - - a->nla_type = attrtype; - a->nla_len = nla_attr_size(len); - - if (data) - memcpy(nla_data(a), data, len); - memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); - - return 0; -} - -static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype) -{ - int used = (*sfa)->actions_len; - int err; - - err = add_action(sfa, attrtype, NULL, 0); - if (err) - return err; - - return used; -} - -static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset) -{ - struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset); - - a->nla_len = sfa->actions_len - st_offset; -} - -static int validate_and_copy_actions(const struct nlattr *attr, - const struct sw_flow_key *key, int depth, - struct sw_flow_actions **sfa); - -static int validate_and_copy_sample(const struct nlattr *attr, - const struct sw_flow_key *key, int depth, - struct sw_flow_actions **sfa) -{ - const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; - const struct nlattr *probability, *actions; - const struct nlattr *a; - int rem, start, err, st_acts; - - memset(attrs, 0, sizeof(attrs)); - nla_for_each_nested(a, attr, rem) { - int type = nla_type(a); - if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) - return -EINVAL; - attrs[type] = a; - } - if (rem) - return -EINVAL; - - probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; - if (!probability || nla_len(probability) != sizeof(u32)) - return -EINVAL; - - actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; - if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) - return -EINVAL; - - /* validation done, copy sample action. */ - start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); - if (start < 0) - return start; - err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32)); - if (err) - return err; - st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); - if (st_acts < 0) - return st_acts; - - err = validate_and_copy_actions(actions, key, depth + 1, sfa); - if (err) - return err; - - add_nested_action_end(*sfa, st_acts); - add_nested_action_end(*sfa, start); - - return 0; -} - -static int validate_tp_port(const struct sw_flow_key *flow_key) -{ - if (flow_key->eth.type == htons(ETH_P_IP)) { - if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) - return 0; - } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { - if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) - return 0; - } - - return -EINVAL; -} - -static int validate_and_copy_set_tun(const struct nlattr *attr, - struct sw_flow_actions **sfa) -{ - struct sw_flow_match match; - struct sw_flow_key key; - int err, start; - - ovs_match_init(&match, &key, NULL); - err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false); - if (err) - return err; - - start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); - if (start < 0) - return start; - - err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key, - sizeof(match.key->tun_key)); - add_nested_action_end(*sfa, start); - - return err; -} - -static int validate_set(const struct nlattr *a, - const struct sw_flow_key *flow_key, - struct sw_flow_actions **sfa, - bool *set_tun) -{ - const struct nlattr *ovs_key = nla_data(a); - int key_type = nla_type(ovs_key); - - /* There can be only one key in a action */ - if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) - return -EINVAL; - - if (key_type > OVS_KEY_ATTR_MAX || - (ovs_key_lens[key_type] != nla_len(ovs_key) && - ovs_key_lens[key_type] != -1)) - return -EINVAL; - - switch (key_type) { - const struct ovs_key_ipv4 *ipv4_key; - const struct ovs_key_ipv6 *ipv6_key; - int err; - - case OVS_KEY_ATTR_PRIORITY: - case OVS_KEY_ATTR_SKB_MARK: - case OVS_KEY_ATTR_ETHERNET: - break; - - case OVS_KEY_ATTR_TUNNEL: - *set_tun = true; - err = validate_and_copy_set_tun(a, sfa); - if (err) - return err; - break; - - case OVS_KEY_ATTR_IPV4: - if (flow_key->eth.type != htons(ETH_P_IP)) - return -EINVAL; - - if (!flow_key->ip.proto) - return -EINVAL; - - ipv4_key = nla_data(ovs_key); - if (ipv4_key->ipv4_proto != flow_key->ip.proto) - return -EINVAL; - - if (ipv4_key->ipv4_frag != flow_key->ip.frag) - return -EINVAL; - - break; - - case OVS_KEY_ATTR_IPV6: - if (flow_key->eth.type != htons(ETH_P_IPV6)) - return -EINVAL; - - if (!flow_key->ip.proto) - return -EINVAL; - - ipv6_key = nla_data(ovs_key); - if (ipv6_key->ipv6_proto != flow_key->ip.proto) - return -EINVAL; - - if (ipv6_key->ipv6_frag != flow_key->ip.frag) - return -EINVAL; - - if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) - return -EINVAL; - - break; - - case OVS_KEY_ATTR_TCP: - if (flow_key->ip.proto != IPPROTO_TCP) - return -EINVAL; - - return validate_tp_port(flow_key); - - case OVS_KEY_ATTR_UDP: - if (flow_key->ip.proto != IPPROTO_UDP) - return -EINVAL; - - return validate_tp_port(flow_key); - - case OVS_KEY_ATTR_SCTP: - if (flow_key->ip.proto != IPPROTO_SCTP) - return -EINVAL; - - return validate_tp_port(flow_key); - - default: - return -EINVAL; - } - - return 0; -} - -static int validate_userspace(const struct nlattr *attr) -{ - static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { - [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, - [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, - }; - struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; - int error; - - error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, - attr, userspace_policy); - if (error) - return error; - - if (!a[OVS_USERSPACE_ATTR_PID] || - !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) - return -EINVAL; - - return 0; -} - -static int copy_action(const struct nlattr *from, - struct sw_flow_actions **sfa) -{ - int totlen = NLA_ALIGN(from->nla_len); - struct nlattr *to; - - to = reserve_sfa_size(sfa, from->nla_len); - if (IS_ERR(to)) - return PTR_ERR(to); - - memcpy(to, from, totlen); - return 0; -} - -static int validate_and_copy_actions(const struct nlattr *attr, - const struct sw_flow_key *key, - int depth, - struct sw_flow_actions **sfa) -{ - const struct nlattr *a; - int rem, err; - - if (depth >= SAMPLE_ACTION_DEPTH) - return -EOVERFLOW; - - nla_for_each_nested(a, attr, rem) { - /* Expected argument lengths, (u32)-1 for variable length. */ - static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { - [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), - [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, - [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), - [OVS_ACTION_ATTR_POP_VLAN] = 0, - [OVS_ACTION_ATTR_SET] = (u32)-1, - [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 - }; - const struct ovs_action_push_vlan *vlan; - int type = nla_type(a); - bool skip_copy; - - if (type > OVS_ACTION_ATTR_MAX || - (action_lens[type] != nla_len(a) && - action_lens[type] != (u32)-1)) - return -EINVAL; - - skip_copy = false; - switch (type) { - case OVS_ACTION_ATTR_UNSPEC: - return -EINVAL; - - case OVS_ACTION_ATTR_USERSPACE: - err = validate_userspace(a); - if (err) - return err; - break; - - case OVS_ACTION_ATTR_OUTPUT: - if (nla_get_u32(a) >= DP_MAX_PORTS) - return -EINVAL; - break; - - - case OVS_ACTION_ATTR_POP_VLAN: - break; - - case OVS_ACTION_ATTR_PUSH_VLAN: - vlan = nla_data(a); - if (vlan->vlan_tpid != htons(ETH_P_8021Q)) - return -EINVAL; - if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) - return -EINVAL; - break; - - case OVS_ACTION_ATTR_SET: - err = validate_set(a, key, sfa, &skip_copy); - if (err) - return err; - break; - - case OVS_ACTION_ATTR_SAMPLE: - err = validate_and_copy_sample(a, key, depth, sfa); - if (err) - return err; - skip_copy = true; - break; - - default: - return -EINVAL; - } - if (!skip_copy) { - err = copy_action(a, sfa); - if (err) - return err; - } - } - - if (rem > 0) - return -EINVAL; - - return 0; -} - static void clear_stats(struct sw_flow *flow) { flow->used = 0; @@ -902,15 +508,16 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (err) goto err_flow_free; - err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); + err = ovs_nla_get_flow_metadata(flow, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_free; - acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); err = PTR_ERR(acts); if (IS_ERR(acts)) goto err_flow_free; - err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts); + err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], + &flow->key, 0, &acts); rcu_assign_pointer(flow->sf_acts, acts); if (err) goto err_flow_free; @@ -958,15 +565,18 @@ static struct genl_ops dp_packet_genl_ops[] = { } }; -static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) +static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, + struct ovs_dp_megaflow_stats *mega_stats) { - struct flow_table *table; int i; - table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held()); - stats->n_flows = ovs_flow_tbl_count(table); + memset(mega_stats, 0, sizeof(*mega_stats)); + + stats->n_flows = ovs_flow_tbl_count(&dp->table); + mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table); stats->n_hit = stats->n_missed = stats->n_lost = 0; + for_each_possible_cpu(i) { const struct dp_stats_percpu *percpu_stats; struct dp_stats_percpu local_stats; @@ -982,6 +592,7 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) stats->n_hit += local_stats.n_hit; stats->n_missed += local_stats.n_missed; stats->n_lost += local_stats.n_lost; + mega_stats->n_mask_hit += local_stats.n_mask_hit; } } @@ -1005,100 +616,6 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = { .name = OVS_FLOW_MCGROUP }; -static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb); -static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) -{ - const struct nlattr *a; - struct nlattr *start; - int err = 0, rem; - - start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); - if (!start) - return -EMSGSIZE; - - nla_for_each_nested(a, attr, rem) { - int type = nla_type(a); - struct nlattr *st_sample; - - switch (type) { - case OVS_SAMPLE_ATTR_PROBABILITY: - if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a))) - return -EMSGSIZE; - break; - case OVS_SAMPLE_ATTR_ACTIONS: - st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); - if (!st_sample) - return -EMSGSIZE; - err = actions_to_attr(nla_data(a), nla_len(a), skb); - if (err) - return err; - nla_nest_end(skb, st_sample); - break; - } - } - - nla_nest_end(skb, start); - return err; -} - -static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) -{ - const struct nlattr *ovs_key = nla_data(a); - int key_type = nla_type(ovs_key); - struct nlattr *start; - int err; - - switch (key_type) { - case OVS_KEY_ATTR_IPV4_TUNNEL: - start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); - if (!start) - return -EMSGSIZE; - - err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key), - nla_data(ovs_key)); - if (err) - return err; - nla_nest_end(skb, start); - break; - default: - if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) - return -EMSGSIZE; - break; - } - - return 0; -} - -static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb) -{ - const struct nlattr *a; - int rem, err; - - nla_for_each_attr(a, attr, len, rem) { - int type = nla_type(a); - - switch (type) { - case OVS_ACTION_ATTR_SET: - err = set_action_to_attr(a, skb); - if (err) - return err; - break; - - case OVS_ACTION_ATTR_SAMPLE: - err = sample_action_to_attr(a, skb); - if (err) - return err; - break; - default: - if (nla_put(skb, type, nla_len(a), nla_data(a))) - return -EMSGSIZE; - break; - } - } - - return 0; -} - static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) { return NLMSG_ALIGN(sizeof(struct ovs_header)) @@ -1135,8 +652,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, if (!nla) goto nla_put_failure; - err = ovs_flow_to_nlattrs(&flow->unmasked_key, - &flow->unmasked_key, skb); + err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); if (err) goto error; nla_nest_end(skb, nla); @@ -1145,7 +661,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, if (!nla) goto nla_put_failure; - err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb); + err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); if (err) goto error; @@ -1155,7 +671,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, used = flow->used; stats.n_packets = flow->packet_count; stats.n_bytes = flow->byte_count; - tcp_flags = flow->tcp_flags; + tcp_flags = (u8)ntohs(flow->tcp_flags); spin_unlock_bh(&flow->lock); if (used && @@ -1188,7 +704,8 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, sf_acts = rcu_dereference_check(flow->sf_acts, lockdep_ovsl_is_held()); - err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb); + err = ovs_nla_put_actions(sf_acts->actions, + sf_acts->actions_len, skb); if (!err) nla_nest_end(skb, start); else { @@ -1234,6 +751,14 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, return skb; } +static struct sw_flow *__ovs_flow_tbl_lookup(struct flow_table *tbl, + const struct sw_flow_key *key) +{ + u32 __always_unused n_mask_hit; + + return ovs_flow_tbl_lookup(tbl, key, &n_mask_hit); +} + static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@ -1243,7 +768,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; - struct flow_table *table; struct sw_flow_actions *acts = NULL; struct sw_flow_match match; int error; @@ -1254,21 +778,21 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto error; ovs_match_init(&match, &key, &mask); - error = ovs_match_from_nlattrs(&match, - a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + error = ovs_nla_get_match(&match, + a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); if (error) goto error; /* Validate actions. */ if (a[OVS_FLOW_ATTR_ACTIONS]) { - acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); error = PTR_ERR(acts); if (IS_ERR(acts)) goto error; - ovs_flow_key_mask(&masked_key, &key, &mask); - error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], - &masked_key, 0, &acts); + ovs_flow_mask_key(&masked_key, &key, &mask); + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], + &masked_key, 0, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); goto err_kfree; @@ -1284,29 +808,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (!dp) goto err_unlock_ovs; - table = ovsl_dereference(dp->table); - /* Check if this is a duplicate flow */ - flow = ovs_flow_lookup(table, &key); + flow = __ovs_flow_tbl_lookup(&dp->table, &key); if (!flow) { - struct sw_flow_mask *mask_p; /* Bail out if we're not allowed to create a new flow. */ error = -ENOENT; if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) goto err_unlock_ovs; - /* Expand table, if necessary, to make room. */ - if (ovs_flow_tbl_need_to_expand(table)) { - struct flow_table *new_table; - - new_table = ovs_flow_tbl_expand(table); - if (!IS_ERR(new_table)) { - rcu_assign_pointer(dp->table, new_table); - ovs_flow_tbl_destroy(table, true); - table = ovsl_dereference(dp->table); - } - } - /* Allocate flow. */ flow = ovs_flow_alloc(); if (IS_ERR(flow)) { @@ -1317,25 +826,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) flow->key = masked_key; flow->unmasked_key = key; - - /* Make sure mask is unique in the system */ - mask_p = ovs_sw_flow_mask_find(table, &mask); - if (!mask_p) { - /* Allocate a new mask if none exsits. */ - mask_p = ovs_sw_flow_mask_alloc(); - if (!mask_p) - goto err_flow_free; - mask_p->key = mask.key; - mask_p->range = mask.range; - ovs_sw_flow_mask_insert(table, mask_p); - } - - ovs_sw_flow_mask_add_ref(mask_p); - flow->mask = mask_p; rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ - ovs_flow_insert(table, flow); + error = ovs_flow_tbl_insert(&dp->table, flow, &mask); + if (error) { + acts = NULL; + goto err_flow_free; + } reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); @@ -1356,7 +854,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* The unmasked key has to be the same for flow updates. */ error = -EINVAL; - if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) { + if (!ovs_flow_cmp_unmasked_key(flow, &match)) { OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n"); goto err_unlock_ovs; } @@ -1364,7 +862,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); - ovs_flow_deferred_free_acts(old_acts); + ovs_nla_free_flow_actions(old_acts); reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); @@ -1403,7 +901,6 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct sw_flow *flow; struct datapath *dp; - struct flow_table *table; struct sw_flow_match match; int err; @@ -1413,7 +910,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) } ovs_match_init(&match, &key, NULL); - err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) return err; @@ -1424,9 +921,8 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - table = ovsl_dereference(dp->table); - flow = ovs_flow_lookup_unmasked_key(table, &match); - if (!flow) { + flow = __ovs_flow_tbl_lookup(&dp->table, &key); + if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; } @@ -1453,7 +949,6 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct sw_flow *flow; struct datapath *dp; - struct flow_table *table; struct sw_flow_match match; int err; @@ -1465,18 +960,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) } if (!a[OVS_FLOW_ATTR_KEY]) { - err = flush_flows(dp); + err = ovs_flow_tbl_flush(&dp->table); goto unlock; } ovs_match_init(&match, &key, NULL); - err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) goto unlock; - table = ovsl_dereference(dp->table); - flow = ovs_flow_lookup_unmasked_key(table, &match); - if (!flow) { + flow = __ovs_flow_tbl_lookup(&dp->table, &key); + if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; } @@ -1487,7 +981,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - ovs_flow_remove(table, flow); + ovs_flow_tbl_remove(&dp->table, flow); err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL); @@ -1506,8 +1000,8 @@ unlock: static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); + struct table_instance *ti; struct datapath *dp; - struct flow_table *table; rcu_read_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); @@ -1516,14 +1010,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) return -ENODEV; } - table = rcu_dereference(dp->table); + ti = rcu_dereference(dp->table.ti); for (;;) { struct sw_flow *flow; u32 bucket, obj; bucket = cb->args[0]; obj = cb->args[1]; - flow = ovs_flow_dump_next(table, &bucket, &obj); + flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj); if (!flow) break; @@ -1589,6 +1083,7 @@ static size_t ovs_dp_cmd_msg_size(void) msgsize += nla_total_size(IFNAMSIZ); msgsize += nla_total_size(sizeof(struct ovs_dp_stats)); + msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats)); return msgsize; } @@ -1598,6 +1093,7 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, { struct ovs_header *ovs_header; struct ovs_dp_stats dp_stats; + struct ovs_dp_megaflow_stats dp_megaflow_stats; int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, @@ -1613,8 +1109,14 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, if (err) goto nla_put_failure; - get_dp_stats(dp, &dp_stats); - if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats)) + get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); + if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), + &dp_stats)) + goto nla_put_failure; + + if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS, + sizeof(struct ovs_dp_megaflow_stats), + &dp_megaflow_stats)) goto nla_put_failure; return genlmsg_end(skb, ovs_header); @@ -1687,9 +1189,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); /* Allocate table. */ - err = -ENOMEM; - rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS)); - if (!dp->table) + err = ovs_flow_tbl_init(&dp->table); + if (err) goto err_free_dp; dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); @@ -1699,7 +1200,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) } dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), - GFP_KERNEL); + GFP_KERNEL); if (!dp->ports) { err = -ENOMEM; goto err_destroy_percpu; @@ -1746,7 +1247,7 @@ err_destroy_ports_array: err_destroy_percpu: free_percpu(dp->stats_percpu); err_destroy_table: - ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false); + ovs_flow_tbl_destroy(&dp->table); err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); @@ -2336,32 +1837,6 @@ error: return err; } -static void rehash_flow_table(struct work_struct *work) -{ - struct datapath *dp; - struct net *net; - - ovs_lock(); - rtnl_lock(); - for_each_net(net) { - struct ovs_net *ovs_net = net_generic(net, ovs_net_id); - - list_for_each_entry(dp, &ovs_net->dps, list_node) { - struct flow_table *old_table = ovsl_dereference(dp->table); - struct flow_table *new_table; - - new_table = ovs_flow_tbl_rehash(old_table); - if (!IS_ERR(new_table)) { - rcu_assign_pointer(dp->table, new_table); - ovs_flow_tbl_destroy(old_table, true); - } - } - } - rtnl_unlock(); - ovs_unlock(); - schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); -} - static int __net_init ovs_init_net(struct net *net) { struct ovs_net *ovs_net = net_generic(net, ovs_net_id); @@ -2419,8 +1894,6 @@ static int __init dp_init(void) if (err < 0) goto error_unreg_notifier; - schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); - return 0; error_unreg_notifier: @@ -2437,7 +1910,6 @@ error: static void dp_cleanup(void) { - cancel_delayed_work_sync(&rehash_flow_wq); dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); unregister_netdevice_notifier(&ovs_dp_device_notifier); unregister_pernet_device(&ovs_net_ops); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 4d109c176ef3..d3d14a58aa91 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -27,6 +27,7 @@ #include <linux/u64_stats_sync.h> #include "flow.h" +#include "flow_table.h" #include "vport.h" #define DP_MAX_PORTS USHRT_MAX @@ -45,11 +46,15 @@ * @n_lost: Number of received packets that had no matching flow in the flow * table that could not be sent to userspace (normally due to an overflow in * one of the datapath's queues). + * @n_mask_hit: Number of masks looked up for flow match. + * @n_mask_hit / (@n_hit + @n_missed) will be the average masks looked + * up per packet. */ struct dp_stats_percpu { u64 n_hit; u64 n_missed; u64 n_lost; + u64 n_mask_hit; struct u64_stats_sync sync; }; @@ -57,7 +62,7 @@ struct dp_stats_percpu { * struct datapath - datapath for flow-based packet switching * @rcu: RCU callback head for deferred destruction. * @list_node: Element in global 'dps' list. - * @table: Current flow table. Protected by ovs_mutex and RCU. + * @table: flow table. * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by * ovs_mutex and RCU. * @stats_percpu: Per-CPU datapath statistics. @@ -71,7 +76,7 @@ struct datapath { struct list_head list_node; /* Flow table. */ - struct flow_table __rcu *table; + struct flow_table table; /* Switch ports. */ struct hlist_head *ports; diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index c3235675f359..5c2dab276109 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -65,8 +65,7 @@ void ovs_dp_notify_wq(struct work_struct *work) continue; netdev_vport = netdev_vport_priv(vport); - if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED || - netdev_vport->dev->reg_state == NETREG_UNREGISTERING) + if (!(netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH)) dp_detach_port_notify(vport); } } @@ -88,6 +87,10 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, return NOTIFY_DONE; if (event == NETDEV_UNREGISTER) { + /* upper_dev_unlink and decrement promisc immediately */ + ovs_netdev_detach_dev(vport); + + /* schedule vport destroy, dev_put and genl notification */ ovs_net = net_generic(dev_net(dev), ovs_net_id); queue_work(system_wq, &ovs_net->dp_notify_work); } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 410db90db73d..b409f5279601 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -45,202 +45,38 @@ #include <net/ipv6.h> #include <net/ndisc.h> -static struct kmem_cache *flow_cache; - -static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask, - struct sw_flow_key_range *range, u8 val); - -static void update_range__(struct sw_flow_match *match, - size_t offset, size_t size, bool is_mask) +u64 ovs_flow_used_time(unsigned long flow_jiffies) { - struct sw_flow_key_range *range = NULL; - size_t start = rounddown(offset, sizeof(long)); - size_t end = roundup(offset + size, sizeof(long)); - - if (!is_mask) - range = &match->range; - else if (match->mask) - range = &match->mask->range; - - if (!range) - return; - - if (range->start == range->end) { - range->start = start; - range->end = end; - return; - } - - if (range->start > start) - range->start = start; + struct timespec cur_ts; + u64 cur_ms, idle_ms; - if (range->end < end) - range->end = end; -} + ktime_get_ts(&cur_ts); + idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); + cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + + cur_ts.tv_nsec / NSEC_PER_MSEC; -#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ - do { \ - update_range__(match, offsetof(struct sw_flow_key, field), \ - sizeof((match)->key->field), is_mask); \ - if (is_mask) { \ - if ((match)->mask) \ - (match)->mask->key.field = value; \ - } else { \ - (match)->key->field = value; \ - } \ - } while (0) - -#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ - do { \ - update_range__(match, offsetof(struct sw_flow_key, field), \ - len, is_mask); \ - if (is_mask) { \ - if ((match)->mask) \ - memcpy(&(match)->mask->key.field, value_p, len);\ - } else { \ - memcpy(&(match)->key->field, value_p, len); \ - } \ - } while (0) - -static u16 range_n_bytes(const struct sw_flow_key_range *range) -{ - return range->end - range->start; + return cur_ms - idle_ms; } -void ovs_match_init(struct sw_flow_match *match, - struct sw_flow_key *key, - struct sw_flow_mask *mask) -{ - memset(match, 0, sizeof(*match)); - match->key = key; - match->mask = mask; +#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF)) - memset(key, 0, sizeof(*key)); - - if (mask) { - memset(&mask->key, 0, sizeof(mask->key)); - mask->range.start = mask->range.end = 0; - } -} - -static bool ovs_match_validate(const struct sw_flow_match *match, - u64 key_attrs, u64 mask_attrs) +void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) { - u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; - u64 mask_allowed = key_attrs; /* At most allow all key attributes */ - - /* The following mask attributes allowed only if they - * pass the validation tests. */ - mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) - | (1 << OVS_KEY_ATTR_IPV6) - | (1 << OVS_KEY_ATTR_TCP) - | (1 << OVS_KEY_ATTR_UDP) - | (1 << OVS_KEY_ATTR_SCTP) - | (1 << OVS_KEY_ATTR_ICMP) - | (1 << OVS_KEY_ATTR_ICMPV6) - | (1 << OVS_KEY_ATTR_ARP) - | (1 << OVS_KEY_ATTR_ND)); - - /* Always allowed mask fields. */ - mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) - | (1 << OVS_KEY_ATTR_IN_PORT) - | (1 << OVS_KEY_ATTR_ETHERTYPE)); - - /* Check key attributes. */ - if (match->key->eth.type == htons(ETH_P_ARP) - || match->key->eth.type == htons(ETH_P_RARP)) { - key_expected |= 1 << OVS_KEY_ATTR_ARP; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) - mask_allowed |= 1 << OVS_KEY_ATTR_ARP; - } + __be16 tcp_flags = 0; - if (match->key->eth.type == htons(ETH_P_IP)) { - key_expected |= 1 << OVS_KEY_ATTR_IPV4; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) - mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; - - if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { - if (match->key->ip.proto == IPPROTO_UDP) { - key_expected |= 1 << OVS_KEY_ATTR_UDP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_UDP; - } - - if (match->key->ip.proto == IPPROTO_SCTP) { - key_expected |= 1 << OVS_KEY_ATTR_SCTP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; - } - - if (match->key->ip.proto == IPPROTO_TCP) { - key_expected |= 1 << OVS_KEY_ATTR_TCP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_TCP; - } - - if (match->key->ip.proto == IPPROTO_ICMP) { - key_expected |= 1 << OVS_KEY_ATTR_ICMP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; - } - } - } - - if (match->key->eth.type == htons(ETH_P_IPV6)) { - key_expected |= 1 << OVS_KEY_ATTR_IPV6; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) - mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; - - if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { - if (match->key->ip.proto == IPPROTO_UDP) { - key_expected |= 1 << OVS_KEY_ATTR_UDP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_UDP; - } - - if (match->key->ip.proto == IPPROTO_SCTP) { - key_expected |= 1 << OVS_KEY_ATTR_SCTP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; - } - - if (match->key->ip.proto == IPPROTO_TCP) { - key_expected |= 1 << OVS_KEY_ATTR_TCP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_TCP; - } - - if (match->key->ip.proto == IPPROTO_ICMPV6) { - key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; - - if (match->key->ipv6.tp.src == - htons(NDISC_NEIGHBOUR_SOLICITATION) || - match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { - key_expected |= 1 << OVS_KEY_ATTR_ND; - if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) - mask_allowed |= 1 << OVS_KEY_ATTR_ND; - } - } - } - } - - if ((key_attrs & key_expected) != key_expected) { - /* Key attributes check failed. */ - OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", - key_attrs, key_expected); - return false; - } - - if ((mask_attrs & mask_allowed) != mask_attrs) { - /* Mask attributes check failed. */ - OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", - mask_attrs, mask_allowed); - return false; + if ((flow->key.eth.type == htons(ETH_P_IP) || + flow->key.eth.type == htons(ETH_P_IPV6)) && + flow->key.ip.proto == IPPROTO_TCP && + likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { + tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); } - return true; + spin_lock(&flow->lock); + flow->used = jiffies; + flow->packet_count++; + flow->byte_count += skb->len; + flow->tcp_flags |= tcp_flags; + spin_unlock(&flow->lock); } static int check_header(struct sk_buff *skb, int len) @@ -311,19 +147,6 @@ static bool icmphdr_ok(struct sk_buff *skb) sizeof(struct icmphdr)); } -u64 ovs_flow_used_time(unsigned long flow_jiffies) -{ - struct timespec cur_ts; - u64 cur_ms, idle_ms; - - ktime_get_ts(&cur_ts); - idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); - cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + - cur_ts.tv_nsec / NSEC_PER_MSEC; - - return cur_ms - idle_ms; -} - static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) { unsigned int nh_ofs = skb_network_offset(skb); @@ -372,311 +195,6 @@ static bool icmp6hdr_ok(struct sk_buff *skb) sizeof(struct icmp6hdr)); } -void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src, - const struct sw_flow_mask *mask) -{ - const long *m = (long *)((u8 *)&mask->key + mask->range.start); - const long *s = (long *)((u8 *)src + mask->range.start); - long *d = (long *)((u8 *)dst + mask->range.start); - int i; - - /* The memory outside of the 'mask->range' are not set since - * further operations on 'dst' only uses contents within - * 'mask->range'. - */ - for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) - *d++ = *s++ & *m++; -} - -#define TCP_FLAGS_OFFSET 13 -#define TCP_FLAG_MASK 0x3f - -void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) -{ - u8 tcp_flags = 0; - - if ((flow->key.eth.type == htons(ETH_P_IP) || - flow->key.eth.type == htons(ETH_P_IPV6)) && - flow->key.ip.proto == IPPROTO_TCP && - likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { - u8 *tcp = (u8 *)tcp_hdr(skb); - tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; - } - - spin_lock(&flow->lock); - flow->used = jiffies; - flow->packet_count++; - flow->byte_count += skb->len; - flow->tcp_flags |= tcp_flags; - spin_unlock(&flow->lock); -} - -struct sw_flow_actions *ovs_flow_actions_alloc(int size) -{ - struct sw_flow_actions *sfa; - - if (size > MAX_ACTIONS_BUFSIZE) - return ERR_PTR(-EINVAL); - - sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); - if (!sfa) - return ERR_PTR(-ENOMEM); - - sfa->actions_len = 0; - return sfa; -} - -struct sw_flow *ovs_flow_alloc(void) -{ - struct sw_flow *flow; - - flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); - if (!flow) - return ERR_PTR(-ENOMEM); - - spin_lock_init(&flow->lock); - flow->sf_acts = NULL; - flow->mask = NULL; - - return flow; -} - -static struct hlist_head *find_bucket(struct flow_table *table, u32 hash) -{ - hash = jhash_1word(hash, table->hash_seed); - return flex_array_get(table->buckets, - (hash & (table->n_buckets - 1))); -} - -static struct flex_array *alloc_buckets(unsigned int n_buckets) -{ - struct flex_array *buckets; - int i, err; - - buckets = flex_array_alloc(sizeof(struct hlist_head), - n_buckets, GFP_KERNEL); - if (!buckets) - return NULL; - - err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); - if (err) { - flex_array_free(buckets); - return NULL; - } - - for (i = 0; i < n_buckets; i++) - INIT_HLIST_HEAD((struct hlist_head *) - flex_array_get(buckets, i)); - - return buckets; -} - -static void free_buckets(struct flex_array *buckets) -{ - flex_array_free(buckets); -} - -static struct flow_table *__flow_tbl_alloc(int new_size) -{ - struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); - - if (!table) - return NULL; - - table->buckets = alloc_buckets(new_size); - - if (!table->buckets) { - kfree(table); - return NULL; - } - table->n_buckets = new_size; - table->count = 0; - table->node_ver = 0; - table->keep_flows = false; - get_random_bytes(&table->hash_seed, sizeof(u32)); - table->mask_list = NULL; - - return table; -} - -static void __flow_tbl_destroy(struct flow_table *table) -{ - int i; - - if (table->keep_flows) - goto skip_flows; - - for (i = 0; i < table->n_buckets; i++) { - struct sw_flow *flow; - struct hlist_head *head = flex_array_get(table->buckets, i); - struct hlist_node *n; - int ver = table->node_ver; - - hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { - hlist_del(&flow->hash_node[ver]); - ovs_flow_free(flow, false); - } - } - - BUG_ON(!list_empty(table->mask_list)); - kfree(table->mask_list); - -skip_flows: - free_buckets(table->buckets); - kfree(table); -} - -struct flow_table *ovs_flow_tbl_alloc(int new_size) -{ - struct flow_table *table = __flow_tbl_alloc(new_size); - - if (!table) - return NULL; - - table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL); - if (!table->mask_list) { - table->keep_flows = true; - __flow_tbl_destroy(table); - return NULL; - } - INIT_LIST_HEAD(table->mask_list); - - return table; -} - -static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) -{ - struct flow_table *table = container_of(rcu, struct flow_table, rcu); - - __flow_tbl_destroy(table); -} - -void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) -{ - if (!table) - return; - - if (deferred) - call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); - else - __flow_tbl_destroy(table); -} - -struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last) -{ - struct sw_flow *flow; - struct hlist_head *head; - int ver; - int i; - - ver = table->node_ver; - while (*bucket < table->n_buckets) { - i = 0; - head = flex_array_get(table->buckets, *bucket); - hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { - if (i < *last) { - i++; - continue; - } - *last = i + 1; - return flow; - } - (*bucket)++; - *last = 0; - } - - return NULL; -} - -static void __tbl_insert(struct flow_table *table, struct sw_flow *flow) -{ - struct hlist_head *head; - - head = find_bucket(table, flow->hash); - hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); - - table->count++; -} - -static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) -{ - int old_ver; - int i; - - old_ver = old->node_ver; - new->node_ver = !old_ver; - - /* Insert in new table. */ - for (i = 0; i < old->n_buckets; i++) { - struct sw_flow *flow; - struct hlist_head *head; - - head = flex_array_get(old->buckets, i); - - hlist_for_each_entry(flow, head, hash_node[old_ver]) - __tbl_insert(new, flow); - } - - new->mask_list = old->mask_list; - old->keep_flows = true; -} - -static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets) -{ - struct flow_table *new_table; - - new_table = __flow_tbl_alloc(n_buckets); - if (!new_table) - return ERR_PTR(-ENOMEM); - - flow_table_copy_flows(table, new_table); - - return new_table; -} - -struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table) -{ - return __flow_tbl_rehash(table, table->n_buckets); -} - -struct flow_table *ovs_flow_tbl_expand(struct flow_table *table) -{ - return __flow_tbl_rehash(table, table->n_buckets * 2); -} - -static void __flow_free(struct sw_flow *flow) -{ - kfree((struct sf_flow_acts __force *)flow->sf_acts); - kmem_cache_free(flow_cache, flow); -} - -static void rcu_free_flow_callback(struct rcu_head *rcu) -{ - struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); - - __flow_free(flow); -} - -void ovs_flow_free(struct sw_flow *flow, bool deferred) -{ - if (!flow) - return; - - ovs_sw_flow_mask_del_ref(flow->mask, deferred); - - if (deferred) - call_rcu(&flow->rcu, rcu_free_flow_callback); - else - __flow_free(flow); -} - -/* Schedules 'sf_acts' to be freed after the next RCU grace period. - * The caller must hold rcu_read_lock for this to be sensible. */ -void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts) -{ - kfree_rcu(sf_acts, rcu); -} - static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) { struct qtag_prefix { @@ -910,6 +428,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) struct tcphdr *tcp = tcp_hdr(skb); key->ipv4.tp.src = tcp->source; key->ipv4.tp.dst = tcp->dest; + key->ipv4.tp.flags = TCP_FLAGS_BE16(tcp); } } else if (key->ip.proto == IPPROTO_UDP) { if (udphdr_ok(skb)) { @@ -978,6 +497,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) struct tcphdr *tcp = tcp_hdr(skb); key->ipv6.tp.src = tcp->source; key->ipv6.tp.dst = tcp->dest; + key->ipv6.tp.flags = TCP_FLAGS_BE16(tcp); } } else if (key->ip.proto == NEXTHDR_UDP) { if (udphdr_ok(skb)) { @@ -1002,1080 +522,3 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) return 0; } - -static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, - int key_end) -{ - u32 *hash_key = (u32 *)((u8 *)key + key_start); - int hash_u32s = (key_end - key_start) >> 2; - - /* Make sure number of hash bytes are multiple of u32. */ - BUILD_BUG_ON(sizeof(long) % sizeof(u32)); - - return jhash2(hash_key, hash_u32s, 0); -} - -static int flow_key_start(const struct sw_flow_key *key) -{ - if (key->tun_key.ipv4_dst) - return 0; - else - return rounddown(offsetof(struct sw_flow_key, phy), - sizeof(long)); -} - -static bool __cmp_key(const struct sw_flow_key *key1, - const struct sw_flow_key *key2, int key_start, int key_end) -{ - const long *cp1 = (long *)((u8 *)key1 + key_start); - const long *cp2 = (long *)((u8 *)key2 + key_start); - long diffs = 0; - int i; - - for (i = key_start; i < key_end; i += sizeof(long)) - diffs |= *cp1++ ^ *cp2++; - - return diffs == 0; -} - -static bool __flow_cmp_masked_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_start, int key_end) -{ - return __cmp_key(&flow->key, key, key_start, key_end); -} - -static bool __flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_start, int key_end) -{ - return __cmp_key(&flow->unmasked_key, key, key_start, key_end); -} - -bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_end) -{ - int key_start; - key_start = flow_key_start(key); - - return __flow_cmp_unmasked_key(flow, key, key_start, key_end); - -} - -struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table, - struct sw_flow_match *match) -{ - struct sw_flow_key *unmasked = match->key; - int key_end = match->range.end; - struct sw_flow *flow; - - flow = ovs_flow_lookup(table, unmasked); - if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end))) - flow = NULL; - - return flow; -} - -static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table, - const struct sw_flow_key *unmasked, - struct sw_flow_mask *mask) -{ - struct sw_flow *flow; - struct hlist_head *head; - int key_start = mask->range.start; - int key_end = mask->range.end; - u32 hash; - struct sw_flow_key masked_key; - - ovs_flow_key_mask(&masked_key, unmasked, mask); - hash = ovs_flow_hash(&masked_key, key_start, key_end); - head = find_bucket(table, hash); - hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { - if (flow->mask == mask && - __flow_cmp_masked_key(flow, &masked_key, - key_start, key_end)) - return flow; - } - return NULL; -} - -struct sw_flow *ovs_flow_lookup(struct flow_table *tbl, - const struct sw_flow_key *key) -{ - struct sw_flow *flow = NULL; - struct sw_flow_mask *mask; - - list_for_each_entry_rcu(mask, tbl->mask_list, list) { - flow = ovs_masked_flow_lookup(tbl, key, mask); - if (flow) /* Found */ - break; - } - - return flow; -} - - -void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow) -{ - flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start, - flow->mask->range.end); - __tbl_insert(table, flow); -} - -void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow) -{ - BUG_ON(table->count == 0); - hlist_del_rcu(&flow->hash_node[table->node_ver]); - table->count--; -} - -/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ -const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { - [OVS_KEY_ATTR_ENCAP] = -1, - [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), - [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), - [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), - [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), - [OVS_KEY_ATTR_VLAN] = sizeof(__be16), - [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), - [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), - [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), - [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), - [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), - [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), - [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), - [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), - [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), - [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), - [OVS_KEY_ATTR_TUNNEL] = -1, -}; - -static bool is_all_zero(const u8 *fp, size_t size) -{ - int i; - - if (!fp) - return false; - - for (i = 0; i < size; i++) - if (fp[i]) - return false; - - return true; -} - -static int __parse_flow_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], - u64 *attrsp, bool nz) -{ - const struct nlattr *nla; - u32 attrs; - int rem; - - attrs = *attrsp; - nla_for_each_nested(nla, attr, rem) { - u16 type = nla_type(nla); - int expected_len; - - if (type > OVS_KEY_ATTR_MAX) { - OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", - type, OVS_KEY_ATTR_MAX); - return -EINVAL; - } - - if (attrs & (1 << type)) { - OVS_NLERR("Duplicate key attribute (type %d).\n", type); - return -EINVAL; - } - - expected_len = ovs_key_lens[type]; - if (nla_len(nla) != expected_len && expected_len != -1) { - OVS_NLERR("Key attribute has unexpected length (type=%d" - ", length=%d, expected=%d).\n", type, - nla_len(nla), expected_len); - return -EINVAL; - } - - if (!nz || !is_all_zero(nla_data(nla), expected_len)) { - attrs |= 1 << type; - a[type] = nla; - } - } - if (rem) { - OVS_NLERR("Message has %d unknown bytes.\n", rem); - return -EINVAL; - } - - *attrsp = attrs; - return 0; -} - -static int parse_flow_mask_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], u64 *attrsp) -{ - return __parse_flow_nlattrs(attr, a, attrsp, true); -} - -static int parse_flow_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], u64 *attrsp) -{ - return __parse_flow_nlattrs(attr, a, attrsp, false); -} - -int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, - struct sw_flow_match *match, bool is_mask) -{ - struct nlattr *a; - int rem; - bool ttl = false; - __be16 tun_flags = 0; - - nla_for_each_nested(a, attr, rem) { - int type = nla_type(a); - static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { - [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), - [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_TOS] = 1, - [OVS_TUNNEL_KEY_ATTR_TTL] = 1, - [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, - [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, - }; - - if (type > OVS_TUNNEL_KEY_ATTR_MAX) { - OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", - type, OVS_TUNNEL_KEY_ATTR_MAX); - return -EINVAL; - } - - if (ovs_tunnel_key_lens[type] != nla_len(a)) { - OVS_NLERR("IPv4 tunnel attribute type has unexpected " - " length (type=%d, length=%d, expected=%d).\n", - type, nla_len(a), ovs_tunnel_key_lens[type]); - return -EINVAL; - } - - switch (type) { - case OVS_TUNNEL_KEY_ATTR_ID: - SW_FLOW_KEY_PUT(match, tun_key.tun_id, - nla_get_be64(a), is_mask); - tun_flags |= TUNNEL_KEY; - break; - case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, - nla_get_be32(a), is_mask); - break; - case OVS_TUNNEL_KEY_ATTR_IPV4_DST: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, - nla_get_be32(a), is_mask); - break; - case OVS_TUNNEL_KEY_ATTR_TOS: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, - nla_get_u8(a), is_mask); - break; - case OVS_TUNNEL_KEY_ATTR_TTL: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, - nla_get_u8(a), is_mask); - ttl = true; - break; - case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: - tun_flags |= TUNNEL_DONT_FRAGMENT; - break; - case OVS_TUNNEL_KEY_ATTR_CSUM: - tun_flags |= TUNNEL_CSUM; - break; - default: - return -EINVAL; - } - } - - SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); - - if (rem > 0) { - OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); - return -EINVAL; - } - - if (!is_mask) { - if (!match->key->tun_key.ipv4_dst) { - OVS_NLERR("IPv4 tunnel destination address is zero.\n"); - return -EINVAL; - } - - if (!ttl) { - OVS_NLERR("IPv4 tunnel TTL not specified.\n"); - return -EINVAL; - } - } - - return 0; -} - -int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, - const struct ovs_key_ipv4_tunnel *tun_key, - const struct ovs_key_ipv4_tunnel *output) -{ - struct nlattr *nla; - - nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); - if (!nla) - return -EMSGSIZE; - - if (output->tun_flags & TUNNEL_KEY && - nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) - return -EMSGSIZE; - if (output->ipv4_src && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) - return -EMSGSIZE; - if (output->ipv4_dst && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) - return -EMSGSIZE; - if (output->ipv4_tos && - nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) - return -EMSGSIZE; - if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) - return -EMSGSIZE; - if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && - nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) - return -EMSGSIZE; - if ((output->tun_flags & TUNNEL_CSUM) && - nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) - return -EMSGSIZE; - - nla_nest_end(skb, nla); - return 0; -} - -static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, - const struct nlattr **a, bool is_mask) -{ - if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { - SW_FLOW_KEY_PUT(match, phy.priority, - nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); - *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); - } - - if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { - u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); - - if (is_mask) - in_port = 0xffffffff; /* Always exact match in_port. */ - else if (in_port >= DP_MAX_PORTS) - return -EINVAL; - - SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); - *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); - } else if (!is_mask) { - SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); - } - - if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { - uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); - - SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); - *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); - } - if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { - if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, - is_mask)) - return -EINVAL; - *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); - } - return 0; -} - -static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, - const struct nlattr **a, bool is_mask) -{ - int err; - u64 orig_attrs = attrs; - - err = metadata_from_nlattrs(match, &attrs, a, is_mask); - if (err) - return err; - - if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { - const struct ovs_key_ethernet *eth_key; - - eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); - SW_FLOW_KEY_MEMCPY(match, eth.src, - eth_key->eth_src, ETH_ALEN, is_mask); - SW_FLOW_KEY_MEMCPY(match, eth.dst, - eth_key->eth_dst, ETH_ALEN, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); - } - - if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { - __be16 tci; - - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - if (!(tci & htons(VLAN_TAG_PRESENT))) { - if (is_mask) - OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); - else - OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); - - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_VLAN); - } else if (!is_mask) - SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); - - if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { - __be16 eth_type; - - eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - if (is_mask) { - /* Always exact match EtherType. */ - eth_type = htons(0xffff); - } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { - OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", - ntohs(eth_type), ETH_P_802_3_MIN); - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); - } else if (!is_mask) { - SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); - } - - if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { - const struct ovs_key_ipv4 *ipv4_key; - - ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); - if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { - OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", - ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); - return -EINVAL; - } - SW_FLOW_KEY_PUT(match, ip.proto, - ipv4_key->ipv4_proto, is_mask); - SW_FLOW_KEY_PUT(match, ip.tos, - ipv4_key->ipv4_tos, is_mask); - SW_FLOW_KEY_PUT(match, ip.ttl, - ipv4_key->ipv4_ttl, is_mask); - SW_FLOW_KEY_PUT(match, ip.frag, - ipv4_key->ipv4_frag, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.addr.src, - ipv4_key->ipv4_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.addr.dst, - ipv4_key->ipv4_dst, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_IPV4); - } - - if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { - const struct ovs_key_ipv6 *ipv6_key; - - ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); - if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { - OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", - ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); - return -EINVAL; - } - SW_FLOW_KEY_PUT(match, ipv6.label, - ipv6_key->ipv6_label, is_mask); - SW_FLOW_KEY_PUT(match, ip.proto, - ipv6_key->ipv6_proto, is_mask); - SW_FLOW_KEY_PUT(match, ip.tos, - ipv6_key->ipv6_tclass, is_mask); - SW_FLOW_KEY_PUT(match, ip.ttl, - ipv6_key->ipv6_hlimit, is_mask); - SW_FLOW_KEY_PUT(match, ip.frag, - ipv6_key->ipv6_frag, is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, - ipv6_key->ipv6_src, - sizeof(match->key->ipv6.addr.src), - is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, - ipv6_key->ipv6_dst, - sizeof(match->key->ipv6.addr.dst), - is_mask); - - attrs &= ~(1 << OVS_KEY_ATTR_IPV6); - } - - if (attrs & (1 << OVS_KEY_ATTR_ARP)) { - const struct ovs_key_arp *arp_key; - - arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); - if (!is_mask && (arp_key->arp_op & htons(0xff00))) { - OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", - arp_key->arp_op); - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, ipv4.addr.src, - arp_key->arp_sip, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.addr.dst, - arp_key->arp_tip, is_mask); - SW_FLOW_KEY_PUT(match, ip.proto, - ntohs(arp_key->arp_op), is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, - arp_key->arp_sha, ETH_ALEN, is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, - arp_key->arp_tha, ETH_ALEN, is_mask); - - attrs &= ~(1 << OVS_KEY_ATTR_ARP); - } - - if (attrs & (1 << OVS_KEY_ATTR_TCP)) { - const struct ovs_key_tcp *tcp_key; - - tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - tcp_key->tcp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - tcp_key->tcp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - tcp_key->tcp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - tcp_key->tcp_dst, is_mask); - } - attrs &= ~(1 << OVS_KEY_ATTR_TCP); - } - - if (attrs & (1 << OVS_KEY_ATTR_UDP)) { - const struct ovs_key_udp *udp_key; - - udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - udp_key->udp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - udp_key->udp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - udp_key->udp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - udp_key->udp_dst, is_mask); - } - attrs &= ~(1 << OVS_KEY_ATTR_UDP); - } - - if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { - const struct ovs_key_sctp *sctp_key; - - sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - sctp_key->sctp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - sctp_key->sctp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - sctp_key->sctp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - sctp_key->sctp_dst, is_mask); - } - attrs &= ~(1 << OVS_KEY_ATTR_SCTP); - } - - if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { - const struct ovs_key_icmp *icmp_key; - - icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - htons(icmp_key->icmp_type), is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - htons(icmp_key->icmp_code), is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_ICMP); - } - - if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { - const struct ovs_key_icmpv6 *icmpv6_key; - - icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - htons(icmpv6_key->icmpv6_type), is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - htons(icmpv6_key->icmpv6_code), is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); - } - - if (attrs & (1 << OVS_KEY_ATTR_ND)) { - const struct ovs_key_nd *nd_key; - - nd_key = nla_data(a[OVS_KEY_ATTR_ND]); - SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, - nd_key->nd_target, - sizeof(match->key->ipv6.nd.target), - is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, - nd_key->nd_sll, ETH_ALEN, is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, - nd_key->nd_tll, ETH_ALEN, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_ND); - } - - if (attrs != 0) - return -EINVAL; - - return 0; -} - -/** - * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and - * mask. In case the 'mask' is NULL, the flow is treated as exact match - * flow. Otherwise, it is treated as a wildcarded flow, except the mask - * does not include any don't care bit. - * @match: receives the extracted flow match information. - * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute - * sequence. The fields should of the packet that triggered the creation - * of this flow. - * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink - * attribute specifies the mask field of the wildcarded flow. - */ -int ovs_match_from_nlattrs(struct sw_flow_match *match, - const struct nlattr *key, - const struct nlattr *mask) -{ - const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; - const struct nlattr *encap; - u64 key_attrs = 0; - u64 mask_attrs = 0; - bool encap_valid = false; - int err; - - err = parse_flow_nlattrs(key, a, &key_attrs); - if (err) - return err; - - if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && - (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && - (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { - __be16 tci; - - if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && - (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { - OVS_NLERR("Invalid Vlan frame.\n"); - return -EINVAL; - } - - key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - encap = a[OVS_KEY_ATTR_ENCAP]; - key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); - encap_valid = true; - - if (tci & htons(VLAN_TAG_PRESENT)) { - err = parse_flow_nlattrs(encap, a, &key_attrs); - if (err) - return err; - } else if (!tci) { - /* Corner case for truncated 802.1Q header. */ - if (nla_len(encap)) { - OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); - return -EINVAL; - } - } else { - OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); - return -EINVAL; - } - } - - err = ovs_key_from_nlattrs(match, key_attrs, a, false); - if (err) - return err; - - if (mask) { - err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); - if (err) - return err; - - if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) { - __be16 eth_type = 0; - __be16 tci = 0; - - if (!encap_valid) { - OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); - return -EINVAL; - } - - mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); - if (a[OVS_KEY_ATTR_ETHERTYPE]) - eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - - if (eth_type == htons(0xffff)) { - mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); - encap = a[OVS_KEY_ATTR_ENCAP]; - err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); - } else { - OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", - ntohs(eth_type)); - return -EINVAL; - } - - if (a[OVS_KEY_ATTR_VLAN]) - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - - if (!(tci & htons(VLAN_TAG_PRESENT))) { - OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); - return -EINVAL; - } - } - - err = ovs_key_from_nlattrs(match, mask_attrs, a, true); - if (err) - return err; - } else { - /* Populate exact match flow's key mask. */ - if (match->mask) - ovs_sw_flow_mask_set(match->mask, &match->range, 0xff); - } - - if (!ovs_match_validate(match, key_attrs, mask_attrs)) - return -EINVAL; - - return 0; -} - -/** - * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. - * @flow: Receives extracted in_port, priority, tun_key and skb_mark. - * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute - * sequence. - * - * This parses a series of Netlink attributes that form a flow key, which must - * take the same form accepted by flow_from_nlattrs(), but only enough of it to - * get the metadata, that is, the parts of the flow key that cannot be - * extracted from the packet itself. - */ - -int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, - const struct nlattr *attr) -{ - struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; - const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; - u64 attrs = 0; - int err; - struct sw_flow_match match; - - flow->key.phy.in_port = DP_MAX_PORTS; - flow->key.phy.priority = 0; - flow->key.phy.skb_mark = 0; - memset(tun_key, 0, sizeof(flow->key.tun_key)); - - err = parse_flow_nlattrs(attr, a, &attrs); - if (err) - return -EINVAL; - - memset(&match, 0, sizeof(match)); - match.key = &flow->key; - - err = metadata_from_nlattrs(&match, &attrs, a, false); - if (err) - return err; - - return 0; -} - -int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, - const struct sw_flow_key *output, struct sk_buff *skb) -{ - struct ovs_key_ethernet *eth_key; - struct nlattr *nla, *encap; - bool is_mask = (swkey != output); - - if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) - goto nla_put_failure; - - if ((swkey->tun_key.ipv4_dst || is_mask) && - ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) - goto nla_put_failure; - - if (swkey->phy.in_port == DP_MAX_PORTS) { - if (is_mask && (output->phy.in_port == 0xffff)) - if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) - goto nla_put_failure; - } else { - u16 upper_u16; - upper_u16 = !is_mask ? 0 : 0xffff; - - if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, - (upper_u16 << 16) | output->phy.in_port)) - goto nla_put_failure; - } - - if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) - goto nla_put_failure; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); - if (!nla) - goto nla_put_failure; - - eth_key = nla_data(nla); - memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); - memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); - - if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { - __be16 eth_type; - eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || - nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) - goto nla_put_failure; - encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); - if (!swkey->eth.tci) - goto unencap; - } else - encap = NULL; - - if (swkey->eth.type == htons(ETH_P_802_2)) { - /* - * Ethertype 802.2 is represented in the netlink with omitted - * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and - * 0xffff in the mask attribute. Ethertype can also - * be wildcarded. - */ - if (is_mask && output->eth.type) - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, - output->eth.type)) - goto nla_put_failure; - goto unencap; - } - - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) - goto nla_put_failure; - - if (swkey->eth.type == htons(ETH_P_IP)) { - struct ovs_key_ipv4 *ipv4_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); - if (!nla) - goto nla_put_failure; - ipv4_key = nla_data(nla); - ipv4_key->ipv4_src = output->ipv4.addr.src; - ipv4_key->ipv4_dst = output->ipv4.addr.dst; - ipv4_key->ipv4_proto = output->ip.proto; - ipv4_key->ipv4_tos = output->ip.tos; - ipv4_key->ipv4_ttl = output->ip.ttl; - ipv4_key->ipv4_frag = output->ip.frag; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - struct ovs_key_ipv6 *ipv6_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); - if (!nla) - goto nla_put_failure; - ipv6_key = nla_data(nla); - memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, - sizeof(ipv6_key->ipv6_src)); - memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, - sizeof(ipv6_key->ipv6_dst)); - ipv6_key->ipv6_label = output->ipv6.label; - ipv6_key->ipv6_proto = output->ip.proto; - ipv6_key->ipv6_tclass = output->ip.tos; - ipv6_key->ipv6_hlimit = output->ip.ttl; - ipv6_key->ipv6_frag = output->ip.frag; - } else if (swkey->eth.type == htons(ETH_P_ARP) || - swkey->eth.type == htons(ETH_P_RARP)) { - struct ovs_key_arp *arp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); - if (!nla) - goto nla_put_failure; - arp_key = nla_data(nla); - memset(arp_key, 0, sizeof(struct ovs_key_arp)); - arp_key->arp_sip = output->ipv4.addr.src; - arp_key->arp_tip = output->ipv4.addr.dst; - arp_key->arp_op = htons(output->ip.proto); - memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); - memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); - } - - if ((swkey->eth.type == htons(ETH_P_IP) || - swkey->eth.type == htons(ETH_P_IPV6)) && - swkey->ip.frag != OVS_FRAG_TYPE_LATER) { - - if (swkey->ip.proto == IPPROTO_TCP) { - struct ovs_key_tcp *tcp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); - if (!nla) - goto nla_put_failure; - tcp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - tcp_key->tcp_src = output->ipv4.tp.src; - tcp_key->tcp_dst = output->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - tcp_key->tcp_src = output->ipv6.tp.src; - tcp_key->tcp_dst = output->ipv6.tp.dst; - } - } else if (swkey->ip.proto == IPPROTO_UDP) { - struct ovs_key_udp *udp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); - if (!nla) - goto nla_put_failure; - udp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - udp_key->udp_src = output->ipv4.tp.src; - udp_key->udp_dst = output->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - udp_key->udp_src = output->ipv6.tp.src; - udp_key->udp_dst = output->ipv6.tp.dst; - } - } else if (swkey->ip.proto == IPPROTO_SCTP) { - struct ovs_key_sctp *sctp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); - if (!nla) - goto nla_put_failure; - sctp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - sctp_key->sctp_src = swkey->ipv4.tp.src; - sctp_key->sctp_dst = swkey->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - sctp_key->sctp_src = swkey->ipv6.tp.src; - sctp_key->sctp_dst = swkey->ipv6.tp.dst; - } - } else if (swkey->eth.type == htons(ETH_P_IP) && - swkey->ip.proto == IPPROTO_ICMP) { - struct ovs_key_icmp *icmp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); - if (!nla) - goto nla_put_failure; - icmp_key = nla_data(nla); - icmp_key->icmp_type = ntohs(output->ipv4.tp.src); - icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); - } else if (swkey->eth.type == htons(ETH_P_IPV6) && - swkey->ip.proto == IPPROTO_ICMPV6) { - struct ovs_key_icmpv6 *icmpv6_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, - sizeof(*icmpv6_key)); - if (!nla) - goto nla_put_failure; - icmpv6_key = nla_data(nla); - icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); - icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); - - if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || - icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { - struct ovs_key_nd *nd_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); - if (!nla) - goto nla_put_failure; - nd_key = nla_data(nla); - memcpy(nd_key->nd_target, &output->ipv6.nd.target, - sizeof(nd_key->nd_target)); - memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); - memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); - } - } - } - -unencap: - if (encap) - nla_nest_end(skb, encap); - - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - -/* Initializes the flow module. - * Returns zero if successful or a negative error code. */ -int ovs_flow_init(void) -{ - BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); - BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); - - flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, - 0, NULL); - if (flow_cache == NULL) - return -ENOMEM; - - return 0; -} - -/* Uninitializes the flow module. */ -void ovs_flow_exit(void) -{ - kmem_cache_destroy(flow_cache); -} - -struct sw_flow_mask *ovs_sw_flow_mask_alloc(void) -{ - struct sw_flow_mask *mask; - - mask = kmalloc(sizeof(*mask), GFP_KERNEL); - if (mask) - mask->ref_count = 0; - - return mask; -} - -void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask) -{ - mask->ref_count++; -} - -void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) -{ - if (!mask) - return; - - BUG_ON(!mask->ref_count); - mask->ref_count--; - - if (!mask->ref_count) { - list_del_rcu(&mask->list); - if (deferred) - kfree_rcu(mask, rcu); - else - kfree(mask); - } -} - -static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a, - const struct sw_flow_mask *b) -{ - u8 *a_ = (u8 *)&a->key + a->range.start; - u8 *b_ = (u8 *)&b->key + b->range.start; - - return (a->range.end == b->range.end) - && (a->range.start == b->range.start) - && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); -} - -struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, - const struct sw_flow_mask *mask) -{ - struct list_head *ml; - - list_for_each(ml, tbl->mask_list) { - struct sw_flow_mask *m; - m = container_of(ml, struct sw_flow_mask, list); - if (ovs_sw_flow_mask_equal(mask, m)) - return m; - } - - return NULL; -} - -/** - * add a new mask into the mask list. - * The caller needs to make sure that 'mask' is not the same - * as any masks that are already on the list. - */ -void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask) -{ - list_add_rcu(&mask->list, tbl->mask_list); -} - -/** - * Set 'range' fields in the mask to the value of 'val'. - */ -static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask, - struct sw_flow_key_range *range, u8 val) -{ - u8 *m = (u8 *)&mask->key + range->start; - - mask->range = *range; - memset(m, val, range_n_bytes(range)); -} diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 212fbf7510c4..1510f51dbf74 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -33,14 +33,6 @@ #include <net/inet_ecn.h> struct sk_buff; -struct sw_flow_mask; -struct flow_table; - -struct sw_flow_actions { - struct rcu_head rcu; - u32 actions_len; - struct nlattr actions[]; -}; /* Used to memset ovs_key_ipv4_tunnel padding. */ #define OVS_TUNNEL_KEY_SIZE \ @@ -101,6 +93,7 @@ struct sw_flow_key { struct { __be16 src; /* TCP/UDP/SCTP source port. */ __be16 dst; /* TCP/UDP/SCTP destination port. */ + __be16 flags; /* TCP flags. */ } tp; struct { u8 sha[ETH_ALEN]; /* ARP source hardware address. */ @@ -117,6 +110,7 @@ struct sw_flow_key { struct { __be16 src; /* TCP/UDP/SCTP source port. */ __be16 dst; /* TCP/UDP/SCTP destination port. */ + __be16 flags; /* TCP flags. */ } tp; struct { struct in6_addr target; /* ND target address. */ @@ -127,6 +121,31 @@ struct sw_flow_key { }; } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ +struct sw_flow_key_range { + size_t start; + size_t end; +}; + +struct sw_flow_mask { + int ref_count; + struct rcu_head rcu; + struct list_head list; + struct sw_flow_key_range range; + struct sw_flow_key key; +}; + +struct sw_flow_match { + struct sw_flow_key *key; + struct sw_flow_key_range range; + struct sw_flow_mask *mask; +}; + +struct sw_flow_actions { + struct rcu_head rcu; + u32 actions_len; + struct nlattr actions[]; +}; + struct sw_flow { struct rcu_head rcu; struct hlist_node hash_node[2]; @@ -141,23 +160,9 @@ struct sw_flow { unsigned long used; /* Last used time (in jiffies). */ u64 packet_count; /* Number of packets matched. */ u64 byte_count; /* Number of bytes matched. */ - u8 tcp_flags; /* Union of seen TCP flags. */ -}; - -struct sw_flow_key_range { - size_t start; - size_t end; + __be16 tcp_flags; /* Union of seen TCP flags. */ }; -struct sw_flow_match { - struct sw_flow_key *key; - struct sw_flow_key_range range; - struct sw_flow_mask *mask; -}; - -void ovs_match_init(struct sw_flow_match *match, - struct sw_flow_key *key, struct sw_flow_mask *mask); - struct arp_eth_header { __be16 ar_hrd; /* format of hardware address */ __be16 ar_pro; /* format of protocol address */ @@ -172,88 +177,9 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; -int ovs_flow_init(void); -void ovs_flow_exit(void); - -struct sw_flow *ovs_flow_alloc(void); -void ovs_flow_deferred_free(struct sw_flow *); -void ovs_flow_free(struct sw_flow *, bool deferred); - -struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len); -void ovs_flow_deferred_free_acts(struct sw_flow_actions *); - -int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); void ovs_flow_used(struct sw_flow *, struct sk_buff *); u64 ovs_flow_used_time(unsigned long flow_jiffies); -int ovs_flow_to_nlattrs(const struct sw_flow_key *, - const struct sw_flow_key *, struct sk_buff *); -int ovs_match_from_nlattrs(struct sw_flow_match *match, - const struct nlattr *, - const struct nlattr *); -int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, - const struct nlattr *attr); -#define MAX_ACTIONS_BUFSIZE (32 * 1024) -#define TBL_MIN_BUCKETS 1024 - -struct flow_table { - struct flex_array *buckets; - unsigned int count, n_buckets; - struct rcu_head rcu; - struct list_head *mask_list; - int node_ver; - u32 hash_seed; - bool keep_flows; -}; - -static inline int ovs_flow_tbl_count(struct flow_table *table) -{ - return table->count; -} - -static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table) -{ - return (table->count > table->n_buckets); -} - -struct sw_flow *ovs_flow_lookup(struct flow_table *, - const struct sw_flow_key *); -struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table, - struct sw_flow_match *match); - -void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred); -struct flow_table *ovs_flow_tbl_alloc(int new_size); -struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); -struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); - -void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow); -void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow); - -struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *idx); -extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; -int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, - struct sw_flow_match *match, bool is_mask); -int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, - const struct ovs_key_ipv4_tunnel *tun_key, - const struct ovs_key_ipv4_tunnel *output); - -bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_end); - -struct sw_flow_mask { - int ref_count; - struct rcu_head rcu; - struct list_head list; - struct sw_flow_key_range range; - struct sw_flow_key key; -}; +int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); -struct sw_flow_mask *ovs_sw_flow_mask_alloc(void); -void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *); -void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred); -void ovs_sw_flow_mask_insert(struct flow_table *, struct sw_flow_mask *); -struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *, - const struct sw_flow_mask *); -void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src, - const struct sw_flow_mask *mask); #endif /* flow.h */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c new file mode 100644 index 000000000000..2bc1bc1aca3b --- /dev/null +++ b/net/openvswitch/flow_netlink.c @@ -0,0 +1,1630 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include "flow.h" +#include "datapath.h" +#include <linux/uaccess.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <net/llc_pdu.h> +#include <linux/kernel.h> +#include <linux/jhash.h> +#include <linux/jiffies.h> +#include <linux/llc.h> +#include <linux/module.h> +#include <linux/in.h> +#include <linux/rcupdate.h> +#include <linux/if_arp.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/sctp.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/rculist.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/ndisc.h> + +#include "flow_netlink.h" + +static void update_range__(struct sw_flow_match *match, + size_t offset, size_t size, bool is_mask) +{ + struct sw_flow_key_range *range = NULL; + size_t start = rounddown(offset, sizeof(long)); + size_t end = roundup(offset + size, sizeof(long)); + + if (!is_mask) + range = &match->range; + else if (match->mask) + range = &match->mask->range; + + if (!range) + return; + + if (range->start == range->end) { + range->start = start; + range->end = end; + return; + } + + if (range->start > start) + range->start = start; + + if (range->end < end) + range->end = end; +} + +#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ + do { \ + update_range__(match, offsetof(struct sw_flow_key, field), \ + sizeof((match)->key->field), is_mask); \ + if (is_mask) { \ + if ((match)->mask) \ + (match)->mask->key.field = value; \ + } else { \ + (match)->key->field = value; \ + } \ + } while (0) + +#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ + do { \ + update_range__(match, offsetof(struct sw_flow_key, field), \ + len, is_mask); \ + if (is_mask) { \ + if ((match)->mask) \ + memcpy(&(match)->mask->key.field, value_p, len);\ + } else { \ + memcpy(&(match)->key->field, value_p, len); \ + } \ + } while (0) + +static u16 range_n_bytes(const struct sw_flow_key_range *range) +{ + return range->end - range->start; +} + +static bool match_validate(const struct sw_flow_match *match, + u64 key_attrs, u64 mask_attrs) +{ + u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; + u64 mask_allowed = key_attrs; /* At most allow all key attributes */ + + /* The following mask attributes allowed only if they + * pass the validation tests. */ + mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) + | (1 << OVS_KEY_ATTR_IPV6) + | (1 << OVS_KEY_ATTR_TCP) + | (1 << OVS_KEY_ATTR_TCP_FLAGS) + | (1 << OVS_KEY_ATTR_UDP) + | (1 << OVS_KEY_ATTR_SCTP) + | (1 << OVS_KEY_ATTR_ICMP) + | (1 << OVS_KEY_ATTR_ICMPV6) + | (1 << OVS_KEY_ATTR_ARP) + | (1 << OVS_KEY_ATTR_ND)); + + /* Always allowed mask fields. */ + mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) + | (1 << OVS_KEY_ATTR_IN_PORT) + | (1 << OVS_KEY_ATTR_ETHERTYPE)); + + /* Check key attributes. */ + if (match->key->eth.type == htons(ETH_P_ARP) + || match->key->eth.type == htons(ETH_P_RARP)) { + key_expected |= 1 << OVS_KEY_ATTR_ARP; + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + mask_allowed |= 1 << OVS_KEY_ATTR_ARP; + } + + if (match->key->eth.type == htons(ETH_P_IP)) { + key_expected |= 1 << OVS_KEY_ATTR_IPV4; + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; + + if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { + if (match->key->ip.proto == IPPROTO_UDP) { + key_expected |= 1 << OVS_KEY_ATTR_UDP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1 << OVS_KEY_ATTR_UDP; + } + + if (match->key->ip.proto == IPPROTO_SCTP) { + key_expected |= 1 << OVS_KEY_ATTR_SCTP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; + } + + if (match->key->ip.proto == IPPROTO_TCP) { + key_expected |= 1 << OVS_KEY_ATTR_TCP; + key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; + if (match->mask && (match->mask->key.ip.proto == 0xff)) { + mask_allowed |= 1 << OVS_KEY_ATTR_TCP; + mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; + } + } + + if (match->key->ip.proto == IPPROTO_ICMP) { + key_expected |= 1 << OVS_KEY_ATTR_ICMP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; + } + } + } + + if (match->key->eth.type == htons(ETH_P_IPV6)) { + key_expected |= 1 << OVS_KEY_ATTR_IPV6; + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; + + if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { + if (match->key->ip.proto == IPPROTO_UDP) { + key_expected |= 1 << OVS_KEY_ATTR_UDP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1 << OVS_KEY_ATTR_UDP; + } + + if (match->key->ip.proto == IPPROTO_SCTP) { + key_expected |= 1 << OVS_KEY_ATTR_SCTP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; + } + + if (match->key->ip.proto == IPPROTO_TCP) { + key_expected |= 1 << OVS_KEY_ATTR_TCP; + key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; + if (match->mask && (match->mask->key.ip.proto == 0xff)) { + mask_allowed |= 1 << OVS_KEY_ATTR_TCP; + mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; + } + } + + if (match->key->ip.proto == IPPROTO_ICMPV6) { + key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; + + if (match->key->ipv6.tp.src == + htons(NDISC_NEIGHBOUR_SOLICITATION) || + match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { + key_expected |= 1 << OVS_KEY_ATTR_ND; + if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) + mask_allowed |= 1 << OVS_KEY_ATTR_ND; + } + } + } + } + + if ((key_attrs & key_expected) != key_expected) { + /* Key attributes check failed. */ + OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", + key_attrs, key_expected); + return false; + } + + if ((mask_attrs & mask_allowed) != mask_attrs) { + /* Mask attributes check failed. */ + OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", + mask_attrs, mask_allowed); + return false; + } + + return true; +} + +/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ +static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { + [OVS_KEY_ATTR_ENCAP] = -1, + [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), + [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), + [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), + [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), + [OVS_KEY_ATTR_VLAN] = sizeof(__be16), + [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), + [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), + [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), + [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), + [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16), + [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), + [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), + [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), + [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), + [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), + [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), + [OVS_KEY_ATTR_TUNNEL] = -1, +}; + +static bool is_all_zero(const u8 *fp, size_t size) +{ + int i; + + if (!fp) + return false; + + for (i = 0; i < size; i++) + if (fp[i]) + return false; + + return true; +} + +static int __parse_flow_nlattrs(const struct nlattr *attr, + const struct nlattr *a[], + u64 *attrsp, bool nz) +{ + const struct nlattr *nla; + u64 attrs; + int rem; + + attrs = *attrsp; + nla_for_each_nested(nla, attr, rem) { + u16 type = nla_type(nla); + int expected_len; + + if (type > OVS_KEY_ATTR_MAX) { + OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", + type, OVS_KEY_ATTR_MAX); + return -EINVAL; + } + + if (attrs & (1 << type)) { + OVS_NLERR("Duplicate key attribute (type %d).\n", type); + return -EINVAL; + } + + expected_len = ovs_key_lens[type]; + if (nla_len(nla) != expected_len && expected_len != -1) { + OVS_NLERR("Key attribute has unexpected length (type=%d" + ", length=%d, expected=%d).\n", type, + nla_len(nla), expected_len); + return -EINVAL; + } + + if (!nz || !is_all_zero(nla_data(nla), expected_len)) { + attrs |= 1 << type; + a[type] = nla; + } + } + if (rem) { + OVS_NLERR("Message has %d unknown bytes.\n", rem); + return -EINVAL; + } + + *attrsp = attrs; + return 0; +} + +static int parse_flow_mask_nlattrs(const struct nlattr *attr, + const struct nlattr *a[], u64 *attrsp) +{ + return __parse_flow_nlattrs(attr, a, attrsp, true); +} + +static int parse_flow_nlattrs(const struct nlattr *attr, + const struct nlattr *a[], u64 *attrsp) +{ + return __parse_flow_nlattrs(attr, a, attrsp, false); +} + +static int ipv4_tun_from_nlattr(const struct nlattr *attr, + struct sw_flow_match *match, bool is_mask) +{ + struct nlattr *a; + int rem; + bool ttl = false; + __be16 tun_flags = 0; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { + [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), + [OVS_TUNNEL_KEY_ATTR_TOS] = 1, + [OVS_TUNNEL_KEY_ATTR_TTL] = 1, + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, + [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, + }; + + if (type > OVS_TUNNEL_KEY_ATTR_MAX) { + OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", + type, OVS_TUNNEL_KEY_ATTR_MAX); + return -EINVAL; + } + + if (ovs_tunnel_key_lens[type] != nla_len(a)) { + OVS_NLERR("IPv4 tunnel attribute type has unexpected " + " length (type=%d, length=%d, expected=%d).\n", + type, nla_len(a), ovs_tunnel_key_lens[type]); + return -EINVAL; + } + + switch (type) { + case OVS_TUNNEL_KEY_ATTR_ID: + SW_FLOW_KEY_PUT(match, tun_key.tun_id, + nla_get_be64(a), is_mask); + tun_flags |= TUNNEL_KEY; + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: + SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, + nla_get_be32(a), is_mask); + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_DST: + SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, + nla_get_be32(a), is_mask); + break; + case OVS_TUNNEL_KEY_ATTR_TOS: + SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, + nla_get_u8(a), is_mask); + break; + case OVS_TUNNEL_KEY_ATTR_TTL: + SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, + nla_get_u8(a), is_mask); + ttl = true; + break; + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: + tun_flags |= TUNNEL_DONT_FRAGMENT; + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: + tun_flags |= TUNNEL_CSUM; + break; + default: + return -EINVAL; + } + } + + SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); + + if (rem > 0) { + OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); + return -EINVAL; + } + + if (!is_mask) { + if (!match->key->tun_key.ipv4_dst) { + OVS_NLERR("IPv4 tunnel destination address is zero.\n"); + return -EINVAL; + } + + if (!ttl) { + OVS_NLERR("IPv4 tunnel TTL not specified.\n"); + return -EINVAL; + } + } + + return 0; +} + +static int ipv4_tun_to_nlattr(struct sk_buff *skb, + const struct ovs_key_ipv4_tunnel *tun_key, + const struct ovs_key_ipv4_tunnel *output) +{ + struct nlattr *nla; + + nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); + if (!nla) + return -EMSGSIZE; + + if (output->tun_flags & TUNNEL_KEY && + nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) + return -EMSGSIZE; + if (output->ipv4_src && + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) + return -EMSGSIZE; + if (output->ipv4_dst && + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) + return -EMSGSIZE; + if (output->ipv4_tos && + nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) + return -EMSGSIZE; + if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) + return -EMSGSIZE; + if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) + return -EMSGSIZE; + if ((output->tun_flags & TUNNEL_CSUM) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) + return -EMSGSIZE; + + nla_nest_end(skb, nla); + return 0; +} + + +static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, + const struct nlattr **a, bool is_mask) +{ + if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { + SW_FLOW_KEY_PUT(match, phy.priority, + nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); + *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); + } + + if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { + u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); + + if (is_mask) + in_port = 0xffffffff; /* Always exact match in_port. */ + else if (in_port >= DP_MAX_PORTS) + return -EINVAL; + + SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); + *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); + } else if (!is_mask) { + SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); + } + + if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { + uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); + + SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); + *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); + } + if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { + if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, + is_mask)) + return -EINVAL; + *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); + } + return 0; +} + +static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, + const struct nlattr **a, bool is_mask) +{ + int err; + u64 orig_attrs = attrs; + + err = metadata_from_nlattrs(match, &attrs, a, is_mask); + if (err) + return err; + + if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { + const struct ovs_key_ethernet *eth_key; + + eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); + SW_FLOW_KEY_MEMCPY(match, eth.src, + eth_key->eth_src, ETH_ALEN, is_mask); + SW_FLOW_KEY_MEMCPY(match, eth.dst, + eth_key->eth_dst, ETH_ALEN, is_mask); + attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); + } + + if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { + __be16 tci; + + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + if (!(tci & htons(VLAN_TAG_PRESENT))) { + if (is_mask) + OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); + else + OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); + + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); + attrs &= ~(1 << OVS_KEY_ATTR_VLAN); + } else if (!is_mask) + SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); + + if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { + __be16 eth_type; + + eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + if (is_mask) { + /* Always exact match EtherType. */ + eth_type = htons(0xffff); + } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { + OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", + ntohs(eth_type), ETH_P_802_3_MIN); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); + attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); + } else if (!is_mask) { + SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); + } + + if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { + const struct ovs_key_ipv4 *ipv4_key; + + ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); + if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { + OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", + ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); + return -EINVAL; + } + SW_FLOW_KEY_PUT(match, ip.proto, + ipv4_key->ipv4_proto, is_mask); + SW_FLOW_KEY_PUT(match, ip.tos, + ipv4_key->ipv4_tos, is_mask); + SW_FLOW_KEY_PUT(match, ip.ttl, + ipv4_key->ipv4_ttl, is_mask); + SW_FLOW_KEY_PUT(match, ip.frag, + ipv4_key->ipv4_frag, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.addr.src, + ipv4_key->ipv4_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.addr.dst, + ipv4_key->ipv4_dst, is_mask); + attrs &= ~(1 << OVS_KEY_ATTR_IPV4); + } + + if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { + const struct ovs_key_ipv6 *ipv6_key; + + ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); + if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { + OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", + ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); + return -EINVAL; + } + SW_FLOW_KEY_PUT(match, ipv6.label, + ipv6_key->ipv6_label, is_mask); + SW_FLOW_KEY_PUT(match, ip.proto, + ipv6_key->ipv6_proto, is_mask); + SW_FLOW_KEY_PUT(match, ip.tos, + ipv6_key->ipv6_tclass, is_mask); + SW_FLOW_KEY_PUT(match, ip.ttl, + ipv6_key->ipv6_hlimit, is_mask); + SW_FLOW_KEY_PUT(match, ip.frag, + ipv6_key->ipv6_frag, is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, + ipv6_key->ipv6_src, + sizeof(match->key->ipv6.addr.src), + is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, + ipv6_key->ipv6_dst, + sizeof(match->key->ipv6.addr.dst), + is_mask); + + attrs &= ~(1 << OVS_KEY_ATTR_IPV6); + } + + if (attrs & (1 << OVS_KEY_ATTR_ARP)) { + const struct ovs_key_arp *arp_key; + + arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); + if (!is_mask && (arp_key->arp_op & htons(0xff00))) { + OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", + arp_key->arp_op); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, ipv4.addr.src, + arp_key->arp_sip, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.addr.dst, + arp_key->arp_tip, is_mask); + SW_FLOW_KEY_PUT(match, ip.proto, + ntohs(arp_key->arp_op), is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, + arp_key->arp_sha, ETH_ALEN, is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, + arp_key->arp_tha, ETH_ALEN, is_mask); + + attrs &= ~(1 << OVS_KEY_ATTR_ARP); + } + + if (attrs & (1 << OVS_KEY_ATTR_TCP)) { + const struct ovs_key_tcp *tcp_key; + + tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); + if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { + SW_FLOW_KEY_PUT(match, ipv4.tp.src, + tcp_key->tcp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + tcp_key->tcp_dst, is_mask); + } else { + SW_FLOW_KEY_PUT(match, ipv6.tp.src, + tcp_key->tcp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + tcp_key->tcp_dst, is_mask); + } + attrs &= ~(1 << OVS_KEY_ATTR_TCP); + } + + if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { + if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { + SW_FLOW_KEY_PUT(match, ipv4.tp.flags, + nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), + is_mask); + } else { + SW_FLOW_KEY_PUT(match, ipv6.tp.flags, + nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), + is_mask); + } + attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS); + } + + if (attrs & (1 << OVS_KEY_ATTR_UDP)) { + const struct ovs_key_udp *udp_key; + + udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); + if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { + SW_FLOW_KEY_PUT(match, ipv4.tp.src, + udp_key->udp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + udp_key->udp_dst, is_mask); + } else { + SW_FLOW_KEY_PUT(match, ipv6.tp.src, + udp_key->udp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + udp_key->udp_dst, is_mask); + } + attrs &= ~(1 << OVS_KEY_ATTR_UDP); + } + + if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { + const struct ovs_key_sctp *sctp_key; + + sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); + if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { + SW_FLOW_KEY_PUT(match, ipv4.tp.src, + sctp_key->sctp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + sctp_key->sctp_dst, is_mask); + } else { + SW_FLOW_KEY_PUT(match, ipv6.tp.src, + sctp_key->sctp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + sctp_key->sctp_dst, is_mask); + } + attrs &= ~(1 << OVS_KEY_ATTR_SCTP); + } + + if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { + const struct ovs_key_icmp *icmp_key; + + icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); + SW_FLOW_KEY_PUT(match, ipv4.tp.src, + htons(icmp_key->icmp_type), is_mask); + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + htons(icmp_key->icmp_code), is_mask); + attrs &= ~(1 << OVS_KEY_ATTR_ICMP); + } + + if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { + const struct ovs_key_icmpv6 *icmpv6_key; + + icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); + SW_FLOW_KEY_PUT(match, ipv6.tp.src, + htons(icmpv6_key->icmpv6_type), is_mask); + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + htons(icmpv6_key->icmpv6_code), is_mask); + attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); + } + + if (attrs & (1 << OVS_KEY_ATTR_ND)) { + const struct ovs_key_nd *nd_key; + + nd_key = nla_data(a[OVS_KEY_ATTR_ND]); + SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, + nd_key->nd_target, + sizeof(match->key->ipv6.nd.target), + is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, + nd_key->nd_sll, ETH_ALEN, is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, + nd_key->nd_tll, ETH_ALEN, is_mask); + attrs &= ~(1 << OVS_KEY_ATTR_ND); + } + + if (attrs != 0) + return -EINVAL; + + return 0; +} + +static void sw_flow_mask_set(struct sw_flow_mask *mask, + struct sw_flow_key_range *range, u8 val) +{ + u8 *m = (u8 *)&mask->key + range->start; + + mask->range = *range; + memset(m, val, range_n_bytes(range)); +} + +/** + * ovs_nla_get_match - parses Netlink attributes into a flow key and + * mask. In case the 'mask' is NULL, the flow is treated as exact match + * flow. Otherwise, it is treated as a wildcarded flow, except the mask + * does not include any don't care bit. + * @match: receives the extracted flow match information. + * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute + * sequence. The fields should of the packet that triggered the creation + * of this flow. + * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink + * attribute specifies the mask field of the wildcarded flow. + */ +int ovs_nla_get_match(struct sw_flow_match *match, + const struct nlattr *key, + const struct nlattr *mask) +{ + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; + const struct nlattr *encap; + u64 key_attrs = 0; + u64 mask_attrs = 0; + bool encap_valid = false; + int err; + + err = parse_flow_nlattrs(key, a, &key_attrs); + if (err) + return err; + + if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && + (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && + (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { + __be16 tci; + + if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && + (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { + OVS_NLERR("Invalid Vlan frame.\n"); + return -EINVAL; + } + + key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + encap = a[OVS_KEY_ATTR_ENCAP]; + key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); + encap_valid = true; + + if (tci & htons(VLAN_TAG_PRESENT)) { + err = parse_flow_nlattrs(encap, a, &key_attrs); + if (err) + return err; + } else if (!tci) { + /* Corner case for truncated 802.1Q header. */ + if (nla_len(encap)) { + OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); + return -EINVAL; + } + } else { + OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); + return -EINVAL; + } + } + + err = ovs_key_from_nlattrs(match, key_attrs, a, false); + if (err) + return err; + + if (mask) { + err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); + if (err) + return err; + + if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { + __be16 eth_type = 0; + __be16 tci = 0; + + if (!encap_valid) { + OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); + return -EINVAL; + } + + mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); + if (a[OVS_KEY_ATTR_ETHERTYPE]) + eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + + if (eth_type == htons(0xffff)) { + mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); + encap = a[OVS_KEY_ATTR_ENCAP]; + err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); + } else { + OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", + ntohs(eth_type)); + return -EINVAL; + } + + if (a[OVS_KEY_ATTR_VLAN]) + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + + if (!(tci & htons(VLAN_TAG_PRESENT))) { + OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); + return -EINVAL; + } + } + + err = ovs_key_from_nlattrs(match, mask_attrs, a, true); + if (err) + return err; + } else { + /* Populate exact match flow's key mask. */ + if (match->mask) + sw_flow_mask_set(match->mask, &match->range, 0xff); + } + + if (!match_validate(match, key_attrs, mask_attrs)) + return -EINVAL; + + return 0; +} + +/** + * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. + * @flow: Receives extracted in_port, priority, tun_key and skb_mark. + * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute + * sequence. + * + * This parses a series of Netlink attributes that form a flow key, which must + * take the same form accepted by flow_from_nlattrs(), but only enough of it to + * get the metadata, that is, the parts of the flow key that cannot be + * extracted from the packet itself. + */ + +int ovs_nla_get_flow_metadata(struct sw_flow *flow, + const struct nlattr *attr) +{ + struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; + u64 attrs = 0; + int err; + struct sw_flow_match match; + + flow->key.phy.in_port = DP_MAX_PORTS; + flow->key.phy.priority = 0; + flow->key.phy.skb_mark = 0; + memset(tun_key, 0, sizeof(flow->key.tun_key)); + + err = parse_flow_nlattrs(attr, a, &attrs); + if (err) + return -EINVAL; + + memset(&match, 0, sizeof(match)); + match.key = &flow->key; + + err = metadata_from_nlattrs(&match, &attrs, a, false); + if (err) + return err; + + return 0; +} + +int ovs_nla_put_flow(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, struct sk_buff *skb) +{ + struct ovs_key_ethernet *eth_key; + struct nlattr *nla, *encap; + bool is_mask = (swkey != output); + + if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) + goto nla_put_failure; + + if ((swkey->tun_key.ipv4_dst || is_mask) && + ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) + goto nla_put_failure; + + if (swkey->phy.in_port == DP_MAX_PORTS) { + if (is_mask && (output->phy.in_port == 0xffff)) + if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) + goto nla_put_failure; + } else { + u16 upper_u16; + upper_u16 = !is_mask ? 0 : 0xffff; + + if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, + (upper_u16 << 16) | output->phy.in_port)) + goto nla_put_failure; + } + + if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) + goto nla_put_failure; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); + if (!nla) + goto nla_put_failure; + + eth_key = nla_data(nla); + memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); + memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); + + if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { + __be16 eth_type; + eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || + nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) + goto nla_put_failure; + encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + if (!swkey->eth.tci) + goto unencap; + } else + encap = NULL; + + if (swkey->eth.type == htons(ETH_P_802_2)) { + /* + * Ethertype 802.2 is represented in the netlink with omitted + * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and + * 0xffff in the mask attribute. Ethertype can also + * be wildcarded. + */ + if (is_mask && output->eth.type) + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, + output->eth.type)) + goto nla_put_failure; + goto unencap; + } + + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) + goto nla_put_failure; + + if (swkey->eth.type == htons(ETH_P_IP)) { + struct ovs_key_ipv4 *ipv4_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); + if (!nla) + goto nla_put_failure; + ipv4_key = nla_data(nla); + ipv4_key->ipv4_src = output->ipv4.addr.src; + ipv4_key->ipv4_dst = output->ipv4.addr.dst; + ipv4_key->ipv4_proto = output->ip.proto; + ipv4_key->ipv4_tos = output->ip.tos; + ipv4_key->ipv4_ttl = output->ip.ttl; + ipv4_key->ipv4_frag = output->ip.frag; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + struct ovs_key_ipv6 *ipv6_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); + if (!nla) + goto nla_put_failure; + ipv6_key = nla_data(nla); + memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, + sizeof(ipv6_key->ipv6_src)); + memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, + sizeof(ipv6_key->ipv6_dst)); + ipv6_key->ipv6_label = output->ipv6.label; + ipv6_key->ipv6_proto = output->ip.proto; + ipv6_key->ipv6_tclass = output->ip.tos; + ipv6_key->ipv6_hlimit = output->ip.ttl; + ipv6_key->ipv6_frag = output->ip.frag; + } else if (swkey->eth.type == htons(ETH_P_ARP) || + swkey->eth.type == htons(ETH_P_RARP)) { + struct ovs_key_arp *arp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); + if (!nla) + goto nla_put_failure; + arp_key = nla_data(nla); + memset(arp_key, 0, sizeof(struct ovs_key_arp)); + arp_key->arp_sip = output->ipv4.addr.src; + arp_key->arp_tip = output->ipv4.addr.dst; + arp_key->arp_op = htons(output->ip.proto); + memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); + memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); + } + + if ((swkey->eth.type == htons(ETH_P_IP) || + swkey->eth.type == htons(ETH_P_IPV6)) && + swkey->ip.frag != OVS_FRAG_TYPE_LATER) { + + if (swkey->ip.proto == IPPROTO_TCP) { + struct ovs_key_tcp *tcp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); + if (!nla) + goto nla_put_failure; + tcp_key = nla_data(nla); + if (swkey->eth.type == htons(ETH_P_IP)) { + tcp_key->tcp_src = output->ipv4.tp.src; + tcp_key->tcp_dst = output->ipv4.tp.dst; + if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, + output->ipv4.tp.flags)) + goto nla_put_failure; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + tcp_key->tcp_src = output->ipv6.tp.src; + tcp_key->tcp_dst = output->ipv6.tp.dst; + if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, + output->ipv6.tp.flags)) + goto nla_put_failure; + } + } else if (swkey->ip.proto == IPPROTO_UDP) { + struct ovs_key_udp *udp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); + if (!nla) + goto nla_put_failure; + udp_key = nla_data(nla); + if (swkey->eth.type == htons(ETH_P_IP)) { + udp_key->udp_src = output->ipv4.tp.src; + udp_key->udp_dst = output->ipv4.tp.dst; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + udp_key->udp_src = output->ipv6.tp.src; + udp_key->udp_dst = output->ipv6.tp.dst; + } + } else if (swkey->ip.proto == IPPROTO_SCTP) { + struct ovs_key_sctp *sctp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); + if (!nla) + goto nla_put_failure; + sctp_key = nla_data(nla); + if (swkey->eth.type == htons(ETH_P_IP)) { + sctp_key->sctp_src = swkey->ipv4.tp.src; + sctp_key->sctp_dst = swkey->ipv4.tp.dst; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + sctp_key->sctp_src = swkey->ipv6.tp.src; + sctp_key->sctp_dst = swkey->ipv6.tp.dst; + } + } else if (swkey->eth.type == htons(ETH_P_IP) && + swkey->ip.proto == IPPROTO_ICMP) { + struct ovs_key_icmp *icmp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); + if (!nla) + goto nla_put_failure; + icmp_key = nla_data(nla); + icmp_key->icmp_type = ntohs(output->ipv4.tp.src); + icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); + } else if (swkey->eth.type == htons(ETH_P_IPV6) && + swkey->ip.proto == IPPROTO_ICMPV6) { + struct ovs_key_icmpv6 *icmpv6_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, + sizeof(*icmpv6_key)); + if (!nla) + goto nla_put_failure; + icmpv6_key = nla_data(nla); + icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); + icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); + + if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || + icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { + struct ovs_key_nd *nd_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); + if (!nla) + goto nla_put_failure; + nd_key = nla_data(nla); + memcpy(nd_key->nd_target, &output->ipv6.nd.target, + sizeof(nd_key->nd_target)); + memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); + memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); + } + } + } + +unencap: + if (encap) + nla_nest_end(skb, encap); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +#define MAX_ACTIONS_BUFSIZE (32 * 1024) + +struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size) +{ + struct sw_flow_actions *sfa; + + if (size > MAX_ACTIONS_BUFSIZE) + return ERR_PTR(-EINVAL); + + sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); + if (!sfa) + return ERR_PTR(-ENOMEM); + + sfa->actions_len = 0; + return sfa; +} + +/* RCU callback used by ovs_nla_free_flow_actions. */ +static void rcu_free_acts_callback(struct rcu_head *rcu) +{ + struct sw_flow_actions *sf_acts = container_of(rcu, + struct sw_flow_actions, rcu); + kfree(sf_acts); +} + +/* Schedules 'sf_acts' to be freed after the next RCU grace period. + * The caller must hold rcu_read_lock for this to be sensible. */ +void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) +{ + call_rcu(&sf_acts->rcu, rcu_free_acts_callback); +} + +static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, + int attr_len) +{ + + struct sw_flow_actions *acts; + int new_acts_size; + int req_size = NLA_ALIGN(attr_len); + int next_offset = offsetof(struct sw_flow_actions, actions) + + (*sfa)->actions_len; + + if (req_size <= (ksize(*sfa) - next_offset)) + goto out; + + new_acts_size = ksize(*sfa) * 2; + + if (new_acts_size > MAX_ACTIONS_BUFSIZE) { + if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) + return ERR_PTR(-EMSGSIZE); + new_acts_size = MAX_ACTIONS_BUFSIZE; + } + + acts = ovs_nla_alloc_flow_actions(new_acts_size); + if (IS_ERR(acts)) + return (void *)acts; + + memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); + acts->actions_len = (*sfa)->actions_len; + kfree(*sfa); + *sfa = acts; + +out: + (*sfa)->actions_len += req_size; + return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); +} + +static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) +{ + struct nlattr *a; + + a = reserve_sfa_size(sfa, nla_attr_size(len)); + if (IS_ERR(a)) + return PTR_ERR(a); + + a->nla_type = attrtype; + a->nla_len = nla_attr_size(len); + + if (data) + memcpy(nla_data(a), data, len); + memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); + + return 0; +} + +static inline int add_nested_action_start(struct sw_flow_actions **sfa, + int attrtype) +{ + int used = (*sfa)->actions_len; + int err; + + err = add_action(sfa, attrtype, NULL, 0); + if (err) + return err; + + return used; +} + +static inline void add_nested_action_end(struct sw_flow_actions *sfa, + int st_offset) +{ + struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + + st_offset); + + a->nla_len = sfa->actions_len - st_offset; +} + +static int validate_and_copy_sample(const struct nlattr *attr, + const struct sw_flow_key *key, int depth, + struct sw_flow_actions **sfa) +{ + const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; + const struct nlattr *probability, *actions; + const struct nlattr *a; + int rem, start, err, st_acts; + + memset(attrs, 0, sizeof(attrs)); + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) + return -EINVAL; + attrs[type] = a; + } + if (rem) + return -EINVAL; + + probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; + if (!probability || nla_len(probability) != sizeof(u32)) + return -EINVAL; + + actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; + if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) + return -EINVAL; + + /* validation done, copy sample action. */ + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); + if (start < 0) + return start; + err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, + nla_data(probability), sizeof(u32)); + if (err) + return err; + st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); + if (st_acts < 0) + return st_acts; + + err = ovs_nla_copy_actions(actions, key, depth + 1, sfa); + if (err) + return err; + + add_nested_action_end(*sfa, st_acts); + add_nested_action_end(*sfa, start); + + return 0; +} + +static int validate_tp_port(const struct sw_flow_key *flow_key) +{ + if (flow_key->eth.type == htons(ETH_P_IP)) { + if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) + return 0; + } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { + if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) + return 0; + } + + return -EINVAL; +} + +void ovs_match_init(struct sw_flow_match *match, + struct sw_flow_key *key, + struct sw_flow_mask *mask) +{ + memset(match, 0, sizeof(*match)); + match->key = key; + match->mask = mask; + + memset(key, 0, sizeof(*key)); + + if (mask) { + memset(&mask->key, 0, sizeof(mask->key)); + mask->range.start = mask->range.end = 0; + } +} + +static int validate_and_copy_set_tun(const struct nlattr *attr, + struct sw_flow_actions **sfa) +{ + struct sw_flow_match match; + struct sw_flow_key key; + int err, start; + + ovs_match_init(&match, &key, NULL); + err = ipv4_tun_from_nlattr(nla_data(attr), &match, false); + if (err) + return err; + + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); + if (start < 0) + return start; + + err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key, + sizeof(match.key->tun_key)); + add_nested_action_end(*sfa, start); + + return err; +} + +static int validate_set(const struct nlattr *a, + const struct sw_flow_key *flow_key, + struct sw_flow_actions **sfa, + bool *set_tun) +{ + const struct nlattr *ovs_key = nla_data(a); + int key_type = nla_type(ovs_key); + + /* There can be only one key in a action */ + if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) + return -EINVAL; + + if (key_type > OVS_KEY_ATTR_MAX || + (ovs_key_lens[key_type] != nla_len(ovs_key) && + ovs_key_lens[key_type] != -1)) + return -EINVAL; + + switch (key_type) { + const struct ovs_key_ipv4 *ipv4_key; + const struct ovs_key_ipv6 *ipv6_key; + int err; + + case OVS_KEY_ATTR_PRIORITY: + case OVS_KEY_ATTR_SKB_MARK: + case OVS_KEY_ATTR_ETHERNET: + break; + + case OVS_KEY_ATTR_TUNNEL: + *set_tun = true; + err = validate_and_copy_set_tun(a, sfa); + if (err) + return err; + break; + + case OVS_KEY_ATTR_IPV4: + if (flow_key->eth.type != htons(ETH_P_IP)) + return -EINVAL; + + if (!flow_key->ip.proto) + return -EINVAL; + + ipv4_key = nla_data(ovs_key); + if (ipv4_key->ipv4_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv4_key->ipv4_frag != flow_key->ip.frag) + return -EINVAL; + + break; + + case OVS_KEY_ATTR_IPV6: + if (flow_key->eth.type != htons(ETH_P_IPV6)) + return -EINVAL; + + if (!flow_key->ip.proto) + return -EINVAL; + + ipv6_key = nla_data(ovs_key); + if (ipv6_key->ipv6_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv6_key->ipv6_frag != flow_key->ip.frag) + return -EINVAL; + + if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) + return -EINVAL; + + break; + + case OVS_KEY_ATTR_TCP: + if (flow_key->ip.proto != IPPROTO_TCP) + return -EINVAL; + + return validate_tp_port(flow_key); + + case OVS_KEY_ATTR_UDP: + if (flow_key->ip.proto != IPPROTO_UDP) + return -EINVAL; + + return validate_tp_port(flow_key); + + case OVS_KEY_ATTR_SCTP: + if (flow_key->ip.proto != IPPROTO_SCTP) + return -EINVAL; + + return validate_tp_port(flow_key); + + default: + return -EINVAL; + } + + return 0; +} + +static int validate_userspace(const struct nlattr *attr) +{ + static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { + [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, + [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, + }; + struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; + int error; + + error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, + attr, userspace_policy); + if (error) + return error; + + if (!a[OVS_USERSPACE_ATTR_PID] || + !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) + return -EINVAL; + + return 0; +} + +static int copy_action(const struct nlattr *from, + struct sw_flow_actions **sfa) +{ + int totlen = NLA_ALIGN(from->nla_len); + struct nlattr *to; + + to = reserve_sfa_size(sfa, from->nla_len); + if (IS_ERR(to)) + return PTR_ERR(to); + + memcpy(to, from, totlen); + return 0; +} + +int ovs_nla_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, + int depth, + struct sw_flow_actions **sfa) +{ + const struct nlattr *a; + int rem, err; + + if (depth >= SAMPLE_ACTION_DEPTH) + return -EOVERFLOW; + + nla_for_each_nested(a, attr, rem) { + /* Expected argument lengths, (u32)-1 for variable length. */ + static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { + [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), + [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, + [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), + [OVS_ACTION_ATTR_POP_VLAN] = 0, + [OVS_ACTION_ATTR_SET] = (u32)-1, + [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 + }; + const struct ovs_action_push_vlan *vlan; + int type = nla_type(a); + bool skip_copy; + + if (type > OVS_ACTION_ATTR_MAX || + (action_lens[type] != nla_len(a) && + action_lens[type] != (u32)-1)) + return -EINVAL; + + skip_copy = false; + switch (type) { + case OVS_ACTION_ATTR_UNSPEC: + return -EINVAL; + + case OVS_ACTION_ATTR_USERSPACE: + err = validate_userspace(a); + if (err) + return err; + break; + + case OVS_ACTION_ATTR_OUTPUT: + if (nla_get_u32(a) >= DP_MAX_PORTS) + return -EINVAL; + break; + + + case OVS_ACTION_ATTR_POP_VLAN: + break; + + case OVS_ACTION_ATTR_PUSH_VLAN: + vlan = nla_data(a); + if (vlan->vlan_tpid != htons(ETH_P_8021Q)) + return -EINVAL; + if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) + return -EINVAL; + break; + + case OVS_ACTION_ATTR_SET: + err = validate_set(a, key, sfa, &skip_copy); + if (err) + return err; + break; + + case OVS_ACTION_ATTR_SAMPLE: + err = validate_and_copy_sample(a, key, depth, sfa); + if (err) + return err; + skip_copy = true; + break; + + default: + return -EINVAL; + } + if (!skip_copy) { + err = copy_action(a, sfa); + if (err) + return err; + } + } + + if (rem > 0) + return -EINVAL; + + return 0; +} + +static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) +{ + const struct nlattr *a; + struct nlattr *start; + int err = 0, rem; + + start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); + if (!start) + return -EMSGSIZE; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + struct nlattr *st_sample; + + switch (type) { + case OVS_SAMPLE_ATTR_PROBABILITY: + if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, + sizeof(u32), nla_data(a))) + return -EMSGSIZE; + break; + case OVS_SAMPLE_ATTR_ACTIONS: + st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); + if (!st_sample) + return -EMSGSIZE; + err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); + if (err) + return err; + nla_nest_end(skb, st_sample); + break; + } + } + + nla_nest_end(skb, start); + return err; +} + +static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) +{ + const struct nlattr *ovs_key = nla_data(a); + int key_type = nla_type(ovs_key); + struct nlattr *start; + int err; + + switch (key_type) { + case OVS_KEY_ATTR_IPV4_TUNNEL: + start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); + if (!start) + return -EMSGSIZE; + + err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key), + nla_data(ovs_key)); + if (err) + return err; + nla_nest_end(skb, start); + break; + default: + if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) + return -EMSGSIZE; + break; + } + + return 0; +} + +int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) +{ + const struct nlattr *a; + int rem, err; + + nla_for_each_attr(a, attr, len, rem) { + int type = nla_type(a); + + switch (type) { + case OVS_ACTION_ATTR_SET: + err = set_action_to_attr(a, skb); + if (err) + return err; + break; + + case OVS_ACTION_ATTR_SAMPLE: + err = sample_action_to_attr(a, skb); + if (err) + return err; + break; + default: + if (nla_put(skb, type, nla_len(a), nla_data(a))) + return -EMSGSIZE; + break; + } + } + + return 0; +} diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h new file mode 100644 index 000000000000..440151045d39 --- /dev/null +++ b/net/openvswitch/flow_netlink.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + + +#ifndef FLOW_NETLINK_H +#define FLOW_NETLINK_H 1 + +#include <linux/kernel.h> +#include <linux/netlink.h> +#include <linux/openvswitch.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/if_ether.h> +#include <linux/in6.h> +#include <linux/jiffies.h> +#include <linux/time.h> +#include <linux/flex_array.h> + +#include <net/inet_ecn.h> +#include <net/ip_tunnels.h> + +#include "flow.h" + +void ovs_match_init(struct sw_flow_match *match, + struct sw_flow_key *key, struct sw_flow_mask *mask); + +int ovs_nla_put_flow(const struct sw_flow_key *, + const struct sw_flow_key *, struct sk_buff *); +int ovs_nla_get_flow_metadata(struct sw_flow *flow, + const struct nlattr *attr); +int ovs_nla_get_match(struct sw_flow_match *match, + const struct nlattr *, + const struct nlattr *); + +int ovs_nla_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, int depth, + struct sw_flow_actions **sfa); +int ovs_nla_put_actions(const struct nlattr *attr, + int len, struct sk_buff *skb); + +struct sw_flow_actions *ovs_nla_alloc_flow_actions(int actions_len); +void ovs_nla_free_flow_actions(struct sw_flow_actions *); + +#endif /* flow_netlink.h */ diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c new file mode 100644 index 000000000000..e42542706087 --- /dev/null +++ b/net/openvswitch/flow_table.c @@ -0,0 +1,592 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include "flow.h" +#include "datapath.h" +#include <linux/uaccess.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <net/llc_pdu.h> +#include <linux/kernel.h> +#include <linux/jhash.h> +#include <linux/jiffies.h> +#include <linux/llc.h> +#include <linux/module.h> +#include <linux/in.h> +#include <linux/rcupdate.h> +#include <linux/if_arp.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/sctp.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/rculist.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/ndisc.h> + +#include "datapath.h" + +#define TBL_MIN_BUCKETS 1024 +#define REHASH_INTERVAL (10 * 60 * HZ) + +static struct kmem_cache *flow_cache; + +static u16 range_n_bytes(const struct sw_flow_key_range *range) +{ + return range->end - range->start; +} + +void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, + const struct sw_flow_mask *mask) +{ + const long *m = (long *)((u8 *)&mask->key + mask->range.start); + const long *s = (long *)((u8 *)src + mask->range.start); + long *d = (long *)((u8 *)dst + mask->range.start); + int i; + + /* The memory outside of the 'mask->range' are not set since + * further operations on 'dst' only uses contents within + * 'mask->range'. + */ + for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) + *d++ = *s++ & *m++; +} + +struct sw_flow *ovs_flow_alloc(void) +{ + struct sw_flow *flow; + + flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); + if (!flow) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&flow->lock); + flow->sf_acts = NULL; + flow->mask = NULL; + + return flow; +} + +int ovs_flow_tbl_count(struct flow_table *table) +{ + return table->count; +} + +static struct flex_array *alloc_buckets(unsigned int n_buckets) +{ + struct flex_array *buckets; + int i, err; + + buckets = flex_array_alloc(sizeof(struct hlist_head), + n_buckets, GFP_KERNEL); + if (!buckets) + return NULL; + + err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); + if (err) { + flex_array_free(buckets); + return NULL; + } + + for (i = 0; i < n_buckets; i++) + INIT_HLIST_HEAD((struct hlist_head *) + flex_array_get(buckets, i)); + + return buckets; +} + +static void flow_free(struct sw_flow *flow) +{ + kfree((struct sf_flow_acts __force *)flow->sf_acts); + kmem_cache_free(flow_cache, flow); +} + +static void rcu_free_flow_callback(struct rcu_head *rcu) +{ + struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); + + flow_free(flow); +} + +static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) +{ + struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); + + kfree(mask); +} + +static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) +{ + if (!mask) + return; + + BUG_ON(!mask->ref_count); + mask->ref_count--; + + if (!mask->ref_count) { + list_del_rcu(&mask->list); + if (deferred) + call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); + else + kfree(mask); + } +} + +void ovs_flow_free(struct sw_flow *flow, bool deferred) +{ + if (!flow) + return; + + flow_mask_del_ref(flow->mask, deferred); + + if (deferred) + call_rcu(&flow->rcu, rcu_free_flow_callback); + else + flow_free(flow); +} + +static void free_buckets(struct flex_array *buckets) +{ + flex_array_free(buckets); +} + +static void __table_instance_destroy(struct table_instance *ti) +{ + int i; + + if (ti->keep_flows) + goto skip_flows; + + for (i = 0; i < ti->n_buckets; i++) { + struct sw_flow *flow; + struct hlist_head *head = flex_array_get(ti->buckets, i); + struct hlist_node *n; + int ver = ti->node_ver; + + hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { + hlist_del(&flow->hash_node[ver]); + ovs_flow_free(flow, false); + } + } + +skip_flows: + free_buckets(ti->buckets); + kfree(ti); +} + +static struct table_instance *table_instance_alloc(int new_size) +{ + struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL); + + if (!ti) + return NULL; + + ti->buckets = alloc_buckets(new_size); + + if (!ti->buckets) { + kfree(ti); + return NULL; + } + ti->n_buckets = new_size; + ti->node_ver = 0; + ti->keep_flows = false; + get_random_bytes(&ti->hash_seed, sizeof(u32)); + + return ti; +} + +int ovs_flow_tbl_init(struct flow_table *table) +{ + struct table_instance *ti; + + ti = table_instance_alloc(TBL_MIN_BUCKETS); + + if (!ti) + return -ENOMEM; + + rcu_assign_pointer(table->ti, ti); + INIT_LIST_HEAD(&table->mask_list); + table->last_rehash = jiffies; + table->count = 0; + return 0; +} + +static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) +{ + struct table_instance *ti = container_of(rcu, struct table_instance, rcu); + + __table_instance_destroy(ti); +} + +static void table_instance_destroy(struct table_instance *ti, bool deferred) +{ + if (!ti) + return; + + if (deferred) + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); + else + __table_instance_destroy(ti); +} + +void ovs_flow_tbl_destroy(struct flow_table *table) +{ + struct table_instance *ti = ovsl_dereference(table->ti); + + table_instance_destroy(ti, false); +} + +struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, + u32 *bucket, u32 *last) +{ + struct sw_flow *flow; + struct hlist_head *head; + int ver; + int i; + + ver = ti->node_ver; + while (*bucket < ti->n_buckets) { + i = 0; + head = flex_array_get(ti->buckets, *bucket); + hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { + if (i < *last) { + i++; + continue; + } + *last = i + 1; + return flow; + } + (*bucket)++; + *last = 0; + } + + return NULL; +} + +static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash) +{ + hash = jhash_1word(hash, ti->hash_seed); + return flex_array_get(ti->buckets, + (hash & (ti->n_buckets - 1))); +} + +static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow) +{ + struct hlist_head *head; + + head = find_bucket(ti, flow->hash); + hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head); +} + +static void flow_table_copy_flows(struct table_instance *old, + struct table_instance *new) +{ + int old_ver; + int i; + + old_ver = old->node_ver; + new->node_ver = !old_ver; + + /* Insert in new table. */ + for (i = 0; i < old->n_buckets; i++) { + struct sw_flow *flow; + struct hlist_head *head; + + head = flex_array_get(old->buckets, i); + + hlist_for_each_entry(flow, head, hash_node[old_ver]) + table_instance_insert(new, flow); + } + + old->keep_flows = true; +} + +static struct table_instance *table_instance_rehash(struct table_instance *ti, + int n_buckets) +{ + struct table_instance *new_ti; + + new_ti = table_instance_alloc(n_buckets); + if (!new_ti) + return NULL; + + flow_table_copy_flows(ti, new_ti); + + return new_ti; +} + +int ovs_flow_tbl_flush(struct flow_table *flow_table) +{ + struct table_instance *old_ti; + struct table_instance *new_ti; + + old_ti = ovsl_dereference(flow_table->ti); + new_ti = table_instance_alloc(TBL_MIN_BUCKETS); + if (!new_ti) + return -ENOMEM; + + rcu_assign_pointer(flow_table->ti, new_ti); + flow_table->last_rehash = jiffies; + flow_table->count = 0; + + table_instance_destroy(old_ti, true); + return 0; +} + +static u32 flow_hash(const struct sw_flow_key *key, int key_start, + int key_end) +{ + u32 *hash_key = (u32 *)((u8 *)key + key_start); + int hash_u32s = (key_end - key_start) >> 2; + + /* Make sure number of hash bytes are multiple of u32. */ + BUILD_BUG_ON(sizeof(long) % sizeof(u32)); + + return jhash2(hash_key, hash_u32s, 0); +} + +static int flow_key_start(const struct sw_flow_key *key) +{ + if (key->tun_key.ipv4_dst) + return 0; + else + return rounddown(offsetof(struct sw_flow_key, phy), + sizeof(long)); +} + +static bool cmp_key(const struct sw_flow_key *key1, + const struct sw_flow_key *key2, + int key_start, int key_end) +{ + const long *cp1 = (long *)((u8 *)key1 + key_start); + const long *cp2 = (long *)((u8 *)key2 + key_start); + long diffs = 0; + int i; + + for (i = key_start; i < key_end; i += sizeof(long)) + diffs |= *cp1++ ^ *cp2++; + + return diffs == 0; +} + +static bool flow_cmp_masked_key(const struct sw_flow *flow, + const struct sw_flow_key *key, + int key_start, int key_end) +{ + return cmp_key(&flow->key, key, key_start, key_end); +} + +bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, + struct sw_flow_match *match) +{ + struct sw_flow_key *key = match->key; + int key_start = flow_key_start(key); + int key_end = match->range.end; + + return cmp_key(&flow->unmasked_key, key, key_start, key_end); +} + +static struct sw_flow *masked_flow_lookup(struct table_instance *ti, + const struct sw_flow_key *unmasked, + struct sw_flow_mask *mask) +{ + struct sw_flow *flow; + struct hlist_head *head; + int key_start = mask->range.start; + int key_end = mask->range.end; + u32 hash; + struct sw_flow_key masked_key; + + ovs_flow_mask_key(&masked_key, unmasked, mask); + hash = flow_hash(&masked_key, key_start, key_end); + head = find_bucket(ti, hash); + hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) { + if (flow->mask == mask && flow->hash == hash && + flow_cmp_masked_key(flow, &masked_key, + key_start, key_end)) + return flow; + } + return NULL; +} + +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, + const struct sw_flow_key *key, + u32 *n_mask_hit) +{ + struct table_instance *ti = rcu_dereference(tbl->ti); + struct sw_flow_mask *mask; + struct sw_flow *flow; + + *n_mask_hit = 0; + list_for_each_entry_rcu(mask, &tbl->mask_list, list) { + (*n_mask_hit)++; + flow = masked_flow_lookup(ti, key, mask); + if (flow) /* Found */ + return flow; + } + return NULL; +} + +int ovs_flow_tbl_num_masks(const struct flow_table *table) +{ + struct sw_flow_mask *mask; + int num = 0; + + list_for_each_entry(mask, &table->mask_list, list) + num++; + + return num; +} + +static struct table_instance *table_instance_expand(struct table_instance *ti) +{ + return table_instance_rehash(ti, ti->n_buckets * 2); +} + +void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) +{ + struct table_instance *ti = ovsl_dereference(table->ti); + + BUG_ON(table->count == 0); + hlist_del_rcu(&flow->hash_node[ti->node_ver]); + table->count--; +} + +static struct sw_flow_mask *mask_alloc(void) +{ + struct sw_flow_mask *mask; + + mask = kmalloc(sizeof(*mask), GFP_KERNEL); + if (mask) + mask->ref_count = 0; + + return mask; +} + +static void mask_add_ref(struct sw_flow_mask *mask) +{ + mask->ref_count++; +} + +static bool mask_equal(const struct sw_flow_mask *a, + const struct sw_flow_mask *b) +{ + u8 *a_ = (u8 *)&a->key + a->range.start; + u8 *b_ = (u8 *)&b->key + b->range.start; + + return (a->range.end == b->range.end) + && (a->range.start == b->range.start) + && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); +} + +static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, + const struct sw_flow_mask *mask) +{ + struct list_head *ml; + + list_for_each(ml, &tbl->mask_list) { + struct sw_flow_mask *m; + m = container_of(ml, struct sw_flow_mask, list); + if (mask_equal(mask, m)) + return m; + } + + return NULL; +} + +/** + * add a new mask into the mask list. + * The caller needs to make sure that 'mask' is not the same + * as any masks that are already on the list. + */ +static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, + struct sw_flow_mask *new) +{ + struct sw_flow_mask *mask; + mask = flow_mask_find(tbl, new); + if (!mask) { + /* Allocate a new mask if none exsits. */ + mask = mask_alloc(); + if (!mask) + return -ENOMEM; + mask->key = new->key; + mask->range = new->range; + list_add_rcu(&mask->list, &tbl->mask_list); + } + + mask_add_ref(mask); + flow->mask = mask; + return 0; +} + +int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + struct sw_flow_mask *mask) +{ + struct table_instance *new_ti = NULL; + struct table_instance *ti; + int err; + + err = flow_mask_insert(table, flow, mask); + if (err) + return err; + + flow->hash = flow_hash(&flow->key, flow->mask->range.start, + flow->mask->range.end); + ti = ovsl_dereference(table->ti); + table_instance_insert(ti, flow); + table->count++; + + /* Expand table, if necessary, to make room. */ + if (table->count > ti->n_buckets) + new_ti = table_instance_expand(ti); + else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) + new_ti = table_instance_rehash(ti, ti->n_buckets); + + if (new_ti) { + rcu_assign_pointer(table->ti, new_ti); + table_instance_destroy(ti, true); + table->last_rehash = jiffies; + } + return 0; +} + +/* Initializes the flow module. + * Returns zero if successful or a negative error code. */ +int ovs_flow_init(void) +{ + BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); + BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); + + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, + 0, NULL); + if (flow_cache == NULL) + return -ENOMEM; + + return 0; +} + +/* Uninitializes the flow module. */ +void ovs_flow_exit(void) +{ + kmem_cache_destroy(flow_cache); +} diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h new file mode 100644 index 000000000000..fbe45d5ad07d --- /dev/null +++ b/net/openvswitch/flow_table.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef FLOW_TABLE_H +#define FLOW_TABLE_H 1 + +#include <linux/kernel.h> +#include <linux/netlink.h> +#include <linux/openvswitch.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/if_ether.h> +#include <linux/in6.h> +#include <linux/jiffies.h> +#include <linux/time.h> +#include <linux/flex_array.h> + +#include <net/inet_ecn.h> +#include <net/ip_tunnels.h> + +#include "flow.h" + +struct table_instance { + struct flex_array *buckets; + unsigned int n_buckets; + struct rcu_head rcu; + int node_ver; + u32 hash_seed; + bool keep_flows; +}; + +struct flow_table { + struct table_instance __rcu *ti; + struct list_head mask_list; + unsigned long last_rehash; + unsigned int count; +}; + +int ovs_flow_init(void); +void ovs_flow_exit(void); + +struct sw_flow *ovs_flow_alloc(void); +void ovs_flow_free(struct sw_flow *, bool deferred); + +int ovs_flow_tbl_init(struct flow_table *); +int ovs_flow_tbl_count(struct flow_table *table); +void ovs_flow_tbl_destroy(struct flow_table *table); +int ovs_flow_tbl_flush(struct flow_table *flow_table); + +int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + struct sw_flow_mask *mask); +void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); +int ovs_flow_tbl_num_masks(const struct flow_table *table); +struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, + u32 *bucket, u32 *idx); +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, + const struct sw_flow_key *, + u32 *n_mask_hit); + +bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, + struct sw_flow_match *match); + +void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, + const struct sw_flow_mask *mask); +#endif /* flow_table.h */ diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index c99dea543d64..a3d6951602db 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -24,8 +24,6 @@ #include <linux/if_tunnel.h> #include <linux/if_vlan.h> #include <linux/in.h> -#include <linux/if_vlan.h> -#include <linux/in.h> #include <linux/in_route.h> #include <linux/inetdevice.h> #include <linux/jhash.h> diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 98d3edbbc235..729c68763fe7 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -134,7 +134,7 @@ static void do_setup(struct net_device *netdev) netdev->tx_queue_len = 0; netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | - NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO; + NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; netdev->vlan_features = netdev->features; netdev->features |= NETIF_F_HW_VLAN_CTAG_TX; diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 09d93c13cfd6..d21f77d875ba 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -150,15 +150,25 @@ static void free_port_rcu(struct rcu_head *rcu) ovs_vport_free(vport_from_priv(netdev_vport)); } -static void netdev_destroy(struct vport *vport) +void ovs_netdev_detach_dev(struct vport *vport) { struct netdev_vport *netdev_vport = netdev_vport_priv(vport); - rtnl_lock(); + ASSERT_RTNL(); netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; netdev_rx_handler_unregister(netdev_vport->dev); - netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp)); + netdev_upper_dev_unlink(netdev_vport->dev, + netdev_master_upper_dev_get(netdev_vport->dev)); dev_set_promiscuity(netdev_vport->dev, -1); +} + +static void netdev_destroy(struct vport *vport) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + + rtnl_lock(); + if (netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH) + ovs_netdev_detach_dev(vport); rtnl_unlock(); call_rcu(&netdev_vport->rcu, free_port_rcu); diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index dd298b5c5cdb..8df01c1127e5 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -39,5 +39,6 @@ netdev_vport_priv(const struct vport *vport) } const char *ovs_netdev_get_name(const struct vport *); +void ovs_netdev_detach_dev(struct vport *); #endif /* vport_netdev.h */ diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 56e22b74cf96..e797a50ac2be 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -29,7 +29,6 @@ #include <net/ip.h> #include <net/udp.h> #include <net/ip_tunnels.h> -#include <net/udp.h> #include <net/rtnetlink.h> #include <net/route.h> #include <net/dsfield.h> diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a9dfdda9ed1d..fdc041c57853 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -255,6 +255,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) f->socket_hash != sk->sk_hash)) { f->credit = q->initial_quantum; f->socket_hash = sk->sk_hash; + f->time_next_packet = 0ULL; } return f; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index f6334aa19151..7567e6f1a920 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -279,7 +279,9 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, sctp_v6_to_addr(&dst_saddr, &fl6->saddr, htons(bp->port)); rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { - if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC)) + if (!laddr->valid || laddr->state == SCTP_ADDR_DEL || + (laddr->state != SCTP_ADDR_SRC && + !asoc->src_out_of_asoc_ok)) continue; /* Do not compare against v4 addrs */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 319137340d15..e650978daf27 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -390,7 +390,6 @@ int sctp_packet_transmit(struct sctp_packet *packet) __u8 has_data = 0; struct dst_entry *dst = tp->dst; unsigned char *auth = NULL; /* pointer to auth in skb data */ - __u32 cksum_buf_len = sizeof(struct sctphdr); pr_debug("%s: packet:%p\n", __func__, packet); @@ -493,7 +492,6 @@ int sctp_packet_transmit(struct sctp_packet *packet) if (chunk == packet->auth) auth = skb_tail_pointer(nskb); - cksum_buf_len += chunk->skb->len; memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data, chunk->skb->len); @@ -538,12 +536,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) if (!sctp_checksum_disable) { if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || (dst_xfrm(dst) != NULL) || packet->ipfragok) { - __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); - - /* 3) Put the resultant value into the checksum field in the - * common header, and leave the rest of the bits unchanged. - */ - sh->checksum = sctp_end_cksum(crc32); + sh->checksum = sctp_compute_cksum(nskb, 0); } else { /* no need to seed pseudo checksum for SCTP */ nskb->ip_summed = CHECKSUM_PARTIAL; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 666c66842799..1a6eef39ab2f 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -860,7 +860,6 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds, (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK)) return; - BUG_ON(asoc->peer.primary_path == NULL); sctp_unhash_established(asoc); sctp_association_free(asoc); } diff --git a/net/x25/Kconfig b/net/x25/Kconfig index c959312c45e3..e2fa133f9fba 100644 --- a/net/x25/Kconfig +++ b/net/x25/Kconfig @@ -16,8 +16,8 @@ config X25 if you want that) and the lower level data link layer protocol LAPB (say Y to "LAPB Data Link Driver" below if you want that). - You can read more about X.25 at <http://www.sangoma.com/x25.htm> and - <http://www.cisco.com/univercd/cc/td/doc/product/software/ios11/cbook/cx25.htm>. + You can read more about X.25 at <http://www.sangoma.com/tutorials/x25/> and + <http://docwiki.cisco.com/wiki/X.25>. Information about X.25 for Linux is contained in the files <file:Documentation/networking/x25.txt> and <file:Documentation/networking/x25-iface.txt>. diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 2906d520eea7..ccfdc7115a83 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -141,14 +141,14 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) const int plen = skb->len; int dlen = IPCOMP_SCRATCH_SIZE; u8 *start = skb->data; - const int cpu = get_cpu(); - u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); - struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); + struct crypto_comp *tfm; + u8 *scratch; int err; local_bh_disable(); + scratch = *this_cpu_ptr(ipcomp_scratches); + tfm = *this_cpu_ptr(ipcd->tfms); err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); - local_bh_enable(); if (err) goto out; @@ -158,13 +158,13 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) } memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); - put_cpu(); + local_bh_enable(); pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); return 0; out: - put_cpu(); + local_bh_enable(); return err; } @@ -220,8 +220,8 @@ static void ipcomp_free_scratches(void) static void * __percpu *ipcomp_alloc_scratches(void) { - int i; void * __percpu *scratches; + int i; if (ipcomp_scratch_users++) return ipcomp_scratches; @@ -233,7 +233,9 @@ static void * __percpu *ipcomp_alloc_scratches(void) ipcomp_scratches = scratches; for_each_possible_cpu(i) { - void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE); + void *scratch; + + scratch = vmalloc_node(IPCOMP_SCRATCH_SIZE, cpu_to_node(i)); if (!scratch) return NULL; *per_cpu_ptr(scratches, i) = scratch; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 76e1873811d4..9a91f7431c41 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1844,6 +1844,13 @@ static int xdst_queue_output(struct sk_buff *skb) struct xfrm_dst *xdst = (struct xfrm_dst *) dst; struct xfrm_policy *pol = xdst->pols[0]; struct xfrm_policy_queue *pq = &pol->polq; + const struct sk_buff *fclone = skb + 1; + + if (unlikely(skb->fclone == SKB_FCLONE_ORIG && + fclone->fclone == SKB_FCLONE_CLONE)) { + kfree_skb(skb); + return 0; + } if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { kfree_skb(skb); diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 487ac6f37ca2..9a11f9f799f4 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -55,6 +55,7 @@ static struct sym_entry *table; static unsigned int table_size, table_cnt; static int all_symbols = 0; static char symbol_prefix_char = '\0'; +static unsigned long long kernel_start_addr = 0; int token_profit[0x10000]; @@ -65,7 +66,10 @@ unsigned char best_table_len[256]; static void usage(void) { - fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n"); + fprintf(stderr, "Usage: kallsyms [--all-symbols] " + "[--symbol-prefix=<prefix char>] " + "[--page-offset=<CONFIG_PAGE_OFFSET>] " + "< in.map > out.S\n"); exit(1); } @@ -194,6 +198,9 @@ static int symbol_valid(struct sym_entry *s) int i; int offset = 1; + if (s->addr < kernel_start_addr) + return 0; + /* skip prefix char */ if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char) offset++; @@ -646,6 +653,9 @@ int main(int argc, char **argv) if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\'')) p++; symbol_prefix_char = *p; + } else if (strncmp(argv[i], "--page-offset=", 14) == 0) { + const char *p = &argv[i][14]; + kernel_start_addr = strtoull(p, NULL, 16); } else usage(); } diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 014994936b1c..32b10f53d0b4 100644 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -82,6 +82,8 @@ kallsyms() kallsymopt="${kallsymopt} --all-symbols" fi + kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET" + local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} \ ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}" diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 17f45e8aa89c..e1e9e0c999fe 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -49,6 +49,8 @@ static struct snd_pcm *snd_pcm_get(struct snd_card *card, int device) struct snd_pcm *pcm; list_for_each_entry(pcm, &snd_pcm_devices, list) { + if (pcm->internal) + continue; if (pcm->card == card && pcm->device == device) return pcm; } @@ -60,6 +62,8 @@ static int snd_pcm_next(struct snd_card *card, int device) struct snd_pcm *pcm; list_for_each_entry(pcm, &snd_pcm_devices, list) { + if (pcm->internal) + continue; if (pcm->card == card && pcm->device > device) return pcm->device; else if (pcm->card->number > card->number) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 5b6c4e3c92ca..748c6a941963 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -4864,8 +4864,8 @@ static void hda_power_work(struct work_struct *work) spin_unlock(&codec->power_lock); state = hda_call_codec_suspend(codec, true); - codec->pm_down_notified = 0; - if (!bus->power_keep_link_on && (state & AC_PWRST_CLK_STOP_OK)) { + if (!codec->pm_down_notified && + !bus->power_keep_link_on && (state & AC_PWRST_CLK_STOP_OK)) { codec->pm_down_notified = 1; hda_call_pm_notify(bus, false); } diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 26ad4f0aade3..b7c89dff7066 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -4475,9 +4475,11 @@ int snd_hda_gen_build_controls(struct hda_codec *codec) true, &spec->vmaster_mute.sw_kctl); if (err < 0) return err; - if (spec->vmaster_mute.hook) + if (spec->vmaster_mute.hook) { snd_hda_add_vmaster_hook(codec, &spec->vmaster_mute, spec->vmaster_mute_enum); + snd_hda_sync_vmaster_hook(&spec->vmaster_mute); + } } free_kctls(spec); /* no longer needed */ diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 0cbdd87dde6d..2aa2f579b4d6 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -968,6 +968,15 @@ static void ad1884_fixup_hp_eapd(struct hda_codec *codec, } } +static void ad1884_fixup_thinkpad(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct ad198x_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) + spec->gen.keep_eapd_on = 1; +} + /* set magic COEFs for dmic */ static const struct hda_verb ad1884_dmic_init_verbs[] = { {0x01, AC_VERB_SET_COEF_INDEX, 0x13f7}, @@ -979,6 +988,7 @@ enum { AD1884_FIXUP_AMP_OVERRIDE, AD1884_FIXUP_HP_EAPD, AD1884_FIXUP_DMIC_COEF, + AD1884_FIXUP_THINKPAD, AD1884_FIXUP_HP_TOUCHSMART, }; @@ -997,6 +1007,12 @@ static const struct hda_fixup ad1884_fixups[] = { .type = HDA_FIXUP_VERBS, .v.verbs = ad1884_dmic_init_verbs, }, + [AD1884_FIXUP_THINKPAD] = { + .type = HDA_FIXUP_FUNC, + .v.func = ad1884_fixup_thinkpad, + .chained = true, + .chain_id = AD1884_FIXUP_DMIC_COEF, + }, [AD1884_FIXUP_HP_TOUCHSMART] = { .type = HDA_FIXUP_VERBS, .v.verbs = ad1884_dmic_init_verbs, @@ -1008,7 +1024,7 @@ static const struct hda_fixup ad1884_fixups[] = { static const struct snd_pci_quirk ad1884_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x2a82, "HP Touchsmart", AD1884_FIXUP_HP_TOUCHSMART), SND_PCI_QUIRK_VENDOR(0x103c, "HP", AD1884_FIXUP_HP_EAPD), - SND_PCI_QUIRK_VENDOR(0x17aa, "Lenovo Thinkpad", AD1884_FIXUP_DMIC_COEF), + SND_PCI_QUIRK_VENDOR(0x17aa, "Lenovo Thinkpad", AD1884_FIXUP_THINKPAD), {} }; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bf313bea7085..8ad554312b69 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4623,6 +4623,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_ASUS_MODE4), + SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_ASUS_MODE4), SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT), SND_PCI_QUIRK(0x105b, 0x0cd6, "Foxconn", ALC662_FIXUP_ASUS_MODE2), SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), diff --git a/sound/soc/codecs/wm_hubs.c b/sound/soc/codecs/wm_hubs.c index 8b50e5958de5..01daf655e20b 100644 --- a/sound/soc/codecs/wm_hubs.c +++ b/sound/soc/codecs/wm_hubs.c @@ -530,6 +530,7 @@ static int hp_supply_event(struct snd_soc_dapm_widget *w, hubs->hp_startup_mode); break; } + break; case SND_SOC_DAPM_PRE_PMD: snd_soc_update_bits(codec, WM8993_CHARGE_PUMP_1, diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index c17c14c394df..b2949aed1ac2 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1949,7 +1949,7 @@ static ssize_t dapm_widget_power_read_file(struct file *file, w->active ? "active" : "inactive"); list_for_each_entry(p, &w->sources, list_sink) { - if (p->connected && !p->connected(w, p->sink)) + if (p->connected && !p->connected(w, p->source)) continue; if (p->connect) @@ -3495,6 +3495,7 @@ int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm, if (!w) { dev_err(dapm->dev, "ASoC: Failed to create %s widget\n", dai->driver->playback.stream_name); + return -ENOMEM; } w->priv = dai; @@ -3513,6 +3514,7 @@ int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm, if (!w) { dev_err(dapm->dev, "ASoC: Failed to create %s widget\n", dai->driver->capture.stream_name); + return -ENOMEM; } w->priv = dai; diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e297b74471b8..ca0d3d9f4bac 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -90,8 +90,20 @@ OPTIONS Number of mmap data pages. Must be a power of two. -g:: + Enables call-graph (stack chain/backtrace) recording. + --call-graph:: - Do call-graph (stack chain/backtrace) recording. + Setup and enable call-graph (stack chain/backtrace) recording, + implies -g. + + Allows specifying "fp" (frame pointer) or "dwarf" + (DWARF's CFI - Call Frame Information) as the method to collect + the information used to show the call graphs. + + In some systems, where binaries are build with gcc + --fomit-frame-pointer, using the "fp" method will produce bogus + call graphs, using "dwarf", if available (perf tools linked to + the libunwind library) should be used instead. -q:: --quiet:: diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 58d6598a9686..6a118e71d003 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -140,20 +140,12 @@ Default is to monitor all CPUS. --asm-raw:: Show raw instruction encoding of assembly instructions. --G [type,min,order]:: +-G:: + Enables call-graph (stack chain/backtrace) recording. + --call-graph:: - Display call chains using type, min percent threshold and order. - type can be either: - - flat: single column, linear exposure of call chains. - - graph: use a graph tree, displaying absolute overhead rates. - - fractal: like graph, but displays relative rates. Each branch of - the tree is considered as a new profiled object. - - order can be either: - - callee: callee based call graph. - - caller: inverted caller based call graph. - - Default: fractal,0.5,callee. + Setup and enable call-graph (stack chain/backtrace) recording, + implies -G. --ignore-callees=<regex>:: Ignore callees of the function(s) matching the given regex. diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 935d52216c89..fbc2888d6495 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -888,11 +888,18 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) { err = perf_evlist__parse_sample(kvm->evlist, event, &sample); if (err) { + perf_evlist__mmap_consume(kvm->evlist, idx); pr_err("Failed to parse sample\n"); return -1; } err = perf_session_queue_event(kvm->session, event, &sample, 0); + /* + * FIXME: Here we can't consume the event, as perf_session_queue_event will + * point to it, and it'll get possibly overwritten by the kernel. + */ + perf_evlist__mmap_consume(kvm->evlist, idx); + if (err) { pr_err("Failed to enqueue sample: %d\n", err); return -1; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a41ac41546c9..d04651484640 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -712,21 +712,12 @@ static int get_stack_size(char *str, unsigned long *_size) } #endif /* LIBUNWIND_SUPPORT */ -int record_parse_callchain_opt(const struct option *opt, - const char *arg, int unset) +int record_parse_callchain(const char *arg, struct perf_record_opts *opts) { - struct perf_record_opts *opts = opt->value; char *tok, *name, *saveptr = NULL; char *buf; int ret = -1; - /* --no-call-graph */ - if (unset) - return 0; - - /* We specified default option if none is provided. */ - BUG_ON(!arg); - /* We need buffer that we know we can write to. */ buf = malloc(strlen(arg) + 1); if (!buf) @@ -764,13 +755,9 @@ int record_parse_callchain_opt(const struct option *opt, ret = get_stack_size(tok, &size); opts->stack_dump_size = size; } - - if (!ret) - pr_debug("callchain: stack dump size %d\n", - opts->stack_dump_size); #endif /* LIBUNWIND_SUPPORT */ } else { - pr_err("callchain: Unknown -g option " + pr_err("callchain: Unknown --call-graph option " "value: %s\n", arg); break; } @@ -778,13 +765,52 @@ int record_parse_callchain_opt(const struct option *opt, } while (0); free(buf); + return ret; +} + +static void callchain_debug(struct perf_record_opts *opts) +{ + pr_debug("callchain: type %d\n", opts->call_graph); + if (opts->call_graph == CALLCHAIN_DWARF) + pr_debug("callchain: stack dump size %d\n", + opts->stack_dump_size); +} + +int record_parse_callchain_opt(const struct option *opt, + const char *arg, + int unset) +{ + struct perf_record_opts *opts = opt->value; + int ret; + + /* --no-call-graph */ + if (unset) { + opts->call_graph = CALLCHAIN_NONE; + pr_debug("callchain: disabled\n"); + return 0; + } + + ret = record_parse_callchain(arg, opts); if (!ret) - pr_debug("callchain: type %d\n", opts->call_graph); + callchain_debug(opts); return ret; } +int record_callchain_opt(const struct option *opt, + const char *arg __maybe_unused, + int unset __maybe_unused) +{ + struct perf_record_opts *opts = opt->value; + + if (opts->call_graph == CALLCHAIN_NONE) + opts->call_graph = CALLCHAIN_FP; + + callchain_debug(opts); + return 0; +} + static const char * const record_usage[] = { "perf record [<options>] [<command>]", "perf record [<options>] -- <command> [<options>]", @@ -813,12 +839,12 @@ static struct perf_record record = { }, }; -#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: " +#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: " #ifdef LIBUNWIND_SUPPORT -const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf"; #else -const char record_callchain_help[] = CALLCHAIN_HELP "[fp]"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp"; #endif /* @@ -858,9 +884,12 @@ const struct option record_options[] = { "number of mmap data pages"), OPT_BOOLEAN(0, "group", &record.opts.group, "put the counters into a counter group"), - OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts, - "mode[,dump_size]", record_callchain_help, - &record_parse_callchain_opt, "fp"), + OPT_CALLBACK_NOOPT('g', NULL, &record.opts, + NULL, "enables call-graph recording" , + &record_callchain_opt), + OPT_CALLBACK(0, "call-graph", &record.opts, + "mode[,dump_size]", record_callchain_help, + &record_parse_callchain_opt), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 212214162bb2..5a11f13e56f9 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -810,7 +810,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) ret = perf_evlist__parse_sample(top->evlist, event, &sample); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); - continue; + goto next_event; } evsel = perf_evlist__id2evsel(session->evlist, sample.id); @@ -825,13 +825,13 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) case PERF_RECORD_MISC_USER: ++top->us_samples; if (top->hide_user_symbols) - continue; + goto next_event; machine = &session->machines.host; break; case PERF_RECORD_MISC_KERNEL: ++top->kernel_samples; if (top->hide_kernel_symbols) - continue; + goto next_event; machine = &session->machines.host; break; case PERF_RECORD_MISC_GUEST_KERNEL: @@ -847,7 +847,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) */ /* Fall thru */ default: - continue; + goto next_event; } @@ -859,6 +859,8 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) machine__process_event(machine, event); } else ++session->stats.nr_unknown_events; +next_event: + perf_evlist__mmap_consume(top->evlist, idx); } } @@ -1016,16 +1018,16 @@ out_delete: } static int -parse_callchain_opt(const struct option *opt, const char *arg, int unset) +callchain_opt(const struct option *opt, const char *arg, int unset) { - /* - * --no-call-graph - */ - if (unset) - return 0; - symbol_conf.use_callchain = true; + return record_callchain_opt(opt, arg, unset); +} +static int +parse_callchain_opt(const struct option *opt, const char *arg, int unset) +{ + symbol_conf.use_callchain = true; return record_parse_callchain_opt(opt, arg, unset); } @@ -1106,9 +1108,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), - OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts, - "mode[,dump_size]", record_callchain_help, - &parse_callchain_opt, "fp"), + OPT_CALLBACK_NOOPT('G', NULL, &top.record_opts, + NULL, "enables call-graph recording", + &callchain_opt), + OPT_CALLBACK(0, "call-graph", &top.record_opts, + "mode[,dump_size]", record_callchain_help, + &parse_callchain_opt), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", "ignore callees of these functions in call graphs", report_parse_ignore_callees_opt), diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 71aa3e35406b..99c8d9ad6729 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -987,7 +987,7 @@ again: err = perf_evlist__parse_sample(evlist, event, &sample); if (err) { fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); - continue; + goto next_event; } if (trace->base_time == 0) @@ -1001,18 +1001,20 @@ again: evsel = perf_evlist__id2evsel(evlist, sample.id); if (evsel == NULL) { fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); - continue; + goto next_event; } if (sample.raw_data == NULL) { fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", perf_evsel__name(evsel), sample.tid, sample.cpu, sample.raw_size); - continue; + goto next_event; } handler = evsel->handler.func; handler(trace, evsel, &sample); +next_event: + perf_evlist__mmap_consume(evlist, i); if (done) goto out_unmap_evlist; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 6fb781d5586c..e3fedfa2906e 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -290,6 +290,7 @@ static int process_events(struct machine *machine, struct perf_evlist *evlist, for (i = 0; i < evlist->nr_mmaps; i++) { while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { ret = process_event(machine, evlist, event, state); + perf_evlist__mmap_consume(evlist, i); if (ret < 0) return ret; } diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index d444ea2c47d9..376c35608534 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -36,6 +36,7 @@ static int find_comm(struct perf_evlist *evlist, const char *comm) (pid_t)event->comm.tid == getpid() && strcmp(event->comm.comm, comm) == 0) found += 1; + perf_evlist__mmap_consume(evlist, i); } } return found; diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index c4185b9aeb80..a7232c204eb9 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -122,6 +122,7 @@ int test__basic_mmap(void) goto out_munmap; } nr_events[evsel->idx]++; + perf_evlist__mmap_consume(evlist, 0); } err = 0; diff --git a/tools/perf/tests/open-syscall-tp-fields.c b/tools/perf/tests/open-syscall-tp-fields.c index fc5b9fca8b47..524b221b829b 100644 --- a/tools/perf/tests/open-syscall-tp-fields.c +++ b/tools/perf/tests/open-syscall-tp-fields.c @@ -77,8 +77,10 @@ int test__syscall_open_tp_fields(void) ++nr_events; - if (type != PERF_RECORD_SAMPLE) + if (type != PERF_RECORD_SAMPLE) { + perf_evlist__mmap_consume(evlist, i); continue; + } err = perf_evsel__parse_sample(evsel, event, &sample); if (err) { diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index b8a7056519ac..7923b06ffc91 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -263,6 +263,8 @@ int test__PERF_RECORD(void) type); ++errs; } + + perf_evlist__mmap_consume(evlist, i); } } diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index 0ab61b1f408e..4ca1b938f6a6 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -122,7 +122,7 @@ int test__perf_time_to_tsc(void) if (event->header.type != PERF_RECORD_COMM || (pid_t)event->comm.pid != getpid() || (pid_t)event->comm.tid != getpid()) - continue; + goto next_event; if (strcmp(event->comm.comm, comm1) == 0) { CHECK__(perf_evsel__parse_sample(evsel, event, @@ -134,6 +134,8 @@ int test__perf_time_to_tsc(void) &sample)); comm2_time = sample.time; } +next_event: + perf_evlist__mmap_consume(evlist, i); } } diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 2e41e2d32ccc..6e2b44ec0749 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -78,7 +78,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) struct perf_sample sample; if (event->header.type != PERF_RECORD_SAMPLE) - continue; + goto next_event; err = perf_evlist__parse_sample(evlist, event, &sample); if (err < 0) { @@ -88,6 +88,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) total_periods += sample.period; nr_samples++; +next_event: + perf_evlist__mmap_consume(evlist, 0); } if ((u64) nr_samples == total_periods) { diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 28fe5894b061..a3e64876e940 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -96,10 +96,10 @@ int test__task_exit(void) retry: while ((event = perf_evlist__mmap_read(evlist, 0)) != NULL) { - if (event->header.type != PERF_RECORD_EXIT) - continue; + if (event->header.type == PERF_RECORD_EXIT) + nr_exit++; - nr_exit++; + perf_evlist__mmap_consume(evlist, 0); } if (!exited || !nr_exit) { diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 194e2f42ff5d..6c152686e837 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -315,8 +315,7 @@ static inline void advance_hpp(struct perf_hpp *hpp, int inc) } static int hist_entry__period_snprintf(struct perf_hpp *hpp, - struct hist_entry *he, - bool color) + struct hist_entry *he) { const char *sep = symbol_conf.field_sep; struct perf_hpp_fmt *fmt; @@ -338,7 +337,7 @@ static int hist_entry__period_snprintf(struct perf_hpp *hpp, } else first = false; - if (color && fmt->color) + if (perf_hpp__use_color() && fmt->color) ret = fmt->color(fmt, hpp, he); else ret = fmt->entry(fmt, hpp, he); @@ -358,12 +357,11 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, .buf = bf, .size = size, }; - bool color = !symbol_conf.field_sep; if (size == 0 || size > bfsz) size = hpp.size = bfsz; - ret = hist_entry__period_snprintf(&hpp, he, color); + ret = hist_entry__period_snprintf(&hpp, he); hist_entry__sort_snprintf(he, bf + ret, size - ret, hists); ret = fprintf(fp, "%s\n", bf); @@ -482,6 +480,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, print_entries: linesz = hists__sort_list_width(hists) + 3 + 1; + linesz += perf_hpp__color_overhead(); line = malloc(linesz); if (line == NULL) { ret = -1; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 2b585bc308cf..9e99060408ae 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -147,6 +147,9 @@ static inline void callchain_cursor_advance(struct callchain_cursor *cursor) struct option; +int record_parse_callchain(const char *arg, struct perf_record_opts *opts); int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset); +int record_callchain_opt(const struct option *opt, const char *arg, int unset); + extern const char record_callchain_help[]; #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 9b393e7dca6f..49096ea58a15 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -187,7 +187,7 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, return -1; } - event->header.type = PERF_RECORD_MMAP2; + event->header.type = PERF_RECORD_MMAP; /* * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c */ @@ -198,7 +198,6 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, char prot[5]; char execname[PATH_MAX]; char anonstr[] = "//anon"; - unsigned int ino; size_t size; ssize_t n; @@ -209,15 +208,12 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, ""); /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n", - &event->mmap2.start, &event->mmap2.len, prot, - &event->mmap2.pgoff, &event->mmap2.maj, - &event->mmap2.min, - &ino, execname); - - event->mmap2.ino = (u64)ino; + n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %*x:%*x %*u %s\n", + &event->mmap.start, &event->mmap.len, prot, + &event->mmap.pgoff, + execname); - if (n != 8) + if (n != 5) continue; if (prot[2] != 'x') @@ -227,15 +223,15 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, anonstr); size = strlen(execname) + 1; - memcpy(event->mmap2.filename, execname, size); + memcpy(event->mmap.filename, execname, size); size = PERF_ALIGN(size, sizeof(u64)); - event->mmap2.len -= event->mmap.start; - event->mmap2.header.size = (sizeof(event->mmap2) - - (sizeof(event->mmap2.filename) - size)); - memset(event->mmap2.filename + size, 0, machine->id_hdr_size); - event->mmap2.header.size += machine->id_hdr_size; - event->mmap2.pid = tgid; - event->mmap2.tid = pid; + event->mmap.len -= event->mmap.start; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size)); + memset(event->mmap.filename + size, 0, machine->id_hdr_size); + event->mmap.header.size += machine->id_hdr_size; + event->mmap.pid = tgid; + event->mmap.tid = pid; if (process(tool, event, &synth_sample, machine) != 0) { rc = -1; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f9f77bee0b1b..e584cd30b0f2 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -545,12 +545,19 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) md->prev = old; - if (!evlist->overwrite) - perf_mmap__write_tail(md, old); - return event; } +void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) +{ + if (!evlist->overwrite) { + struct perf_mmap *md = &evlist->mmap[idx]; + unsigned int old = md->prev; + + perf_mmap__write_tail(md, old); + } +} + static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) { if (evlist->mmap[idx].base != NULL) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 880d7139d2fb..206d09339306 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -89,6 +89,8 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx); +void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx); + int perf_evlist__open(struct perf_evlist *evlist); void perf_evlist__close(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 0ce9febf1ba0..9f1ef9bee2d0 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -678,7 +678,6 @@ void perf_evsel__config(struct perf_evsel *evsel, attr->sample_type |= PERF_SAMPLE_WEIGHT; attr->mmap = track; - attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; /* diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 1329b6b6ffe6..ce8dc61ce2c3 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -5,6 +5,7 @@ #include <pthread.h> #include "callchain.h" #include "header.h" +#include "color.h" extern struct callchain_param callchain_param; @@ -175,6 +176,18 @@ void perf_hpp__init(void); void perf_hpp__column_register(struct perf_hpp_fmt *format); void perf_hpp__column_enable(unsigned col); +static inline size_t perf_hpp__use_color(void) +{ + return !symbol_conf.field_sep; +} + +static inline size_t perf_hpp__color_overhead(void) +{ + return perf_hpp__use_color() ? + (COLOR_MAXLEN + sizeof(PERF_COLOR_RESET)) * PERF_HPP__MAX_INDEX + : 0; +} + struct perf_evlist; struct hist_browser_timer { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index c09e0a9fdf4c..f0692737ebf1 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1357,10 +1357,10 @@ int debuginfo__find_probe_point(struct debuginfo *self, unsigned long addr, goto post; } + fname = dwarf_decl_file(&spdie); if (addr == (unsigned long)baseaddr) { /* Function entry - Relative line number is 0 */ lineno = baseline; - fname = dwarf_decl_file(&spdie); goto post; } diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 71b5412bbbb9..2ac4bc92bb1f 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -822,6 +822,8 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, PyObject *pyevent = pyrf_event__new(event); struct pyrf_event *pevent = (struct pyrf_event *)pyevent; + perf_evlist__mmap_consume(evlist, cpu); + if (pyevent == NULL) return PyErr_NoMemory(); diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index a85e4ae5f3ac..c0c9795c4f02 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -282,7 +282,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused, event = find_cache_event(evsel); if (!event) - die("ug! no event found for type %" PRIu64, evsel->attr.config); + die("ug! no event found for type %" PRIu64, (u64)evsel->attr.config); pid = raw_field_value(event, "common_pid", data); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index cc75a3cef388..95d91a0b23af 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -56,6 +56,17 @@ static void handler_call_die(const char *handler_name) Py_FatalError("problem in Python trace event handler"); } +/* + * Insert val into into the dictionary and decrement the reference counter. + * This is necessary for dictionaries since PyDict_SetItemString() does not + * steal a reference, as opposed to PyTuple_SetItem(). + */ +static void pydict_set_item_string_decref(PyObject *dict, const char *key, PyObject *val) +{ + PyDict_SetItemString(dict, key, val); + Py_DECREF(val); +} + static void define_value(enum print_arg_type field_type, const char *ev_name, const char *field_name, @@ -279,11 +290,11 @@ static void python_process_tracepoint(union perf_event *perf_event PyTuple_SetItem(t, n++, PyInt_FromLong(pid)); PyTuple_SetItem(t, n++, PyString_FromString(comm)); } else { - PyDict_SetItemString(dict, "common_cpu", PyInt_FromLong(cpu)); - PyDict_SetItemString(dict, "common_s", PyInt_FromLong(s)); - PyDict_SetItemString(dict, "common_ns", PyInt_FromLong(ns)); - PyDict_SetItemString(dict, "common_pid", PyInt_FromLong(pid)); - PyDict_SetItemString(dict, "common_comm", PyString_FromString(comm)); + pydict_set_item_string_decref(dict, "common_cpu", PyInt_FromLong(cpu)); + pydict_set_item_string_decref(dict, "common_s", PyInt_FromLong(s)); + pydict_set_item_string_decref(dict, "common_ns", PyInt_FromLong(ns)); + pydict_set_item_string_decref(dict, "common_pid", PyInt_FromLong(pid)); + pydict_set_item_string_decref(dict, "common_comm", PyString_FromString(comm)); } for (field = event->format.fields; field; field = field->next) { if (field->flags & FIELD_IS_STRING) { @@ -313,7 +324,7 @@ static void python_process_tracepoint(union perf_event *perf_event if (handler) PyTuple_SetItem(t, n++, obj); else - PyDict_SetItemString(dict, field->name, obj); + pydict_set_item_string_decref(dict, field->name, obj); } if (!handler) @@ -370,21 +381,21 @@ static void python_process_general_event(union perf_event *perf_event if (!handler || !PyCallable_Check(handler)) goto exit; - PyDict_SetItemString(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel))); - PyDict_SetItemString(dict, "attr", PyString_FromStringAndSize( + pydict_set_item_string_decref(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel))); + pydict_set_item_string_decref(dict, "attr", PyString_FromStringAndSize( (const char *)&evsel->attr, sizeof(evsel->attr))); - PyDict_SetItemString(dict, "sample", PyString_FromStringAndSize( + pydict_set_item_string_decref(dict, "sample", PyString_FromStringAndSize( (const char *)sample, sizeof(*sample))); - PyDict_SetItemString(dict, "raw_buf", PyString_FromStringAndSize( + pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize( (const char *)sample->raw_data, sample->raw_size)); - PyDict_SetItemString(dict, "comm", + pydict_set_item_string_decref(dict, "comm", PyString_FromString(thread->comm)); if (al->map) { - PyDict_SetItemString(dict, "dso", + pydict_set_item_string_decref(dict, "dso", PyString_FromString(al->map->dso->name)); } if (al->sym) { - PyDict_SetItemString(dict, "symbol", + pydict_set_item_string_decref(dict, "symbol", PyString_FromString(al->sym->name)); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a9dd682cf5e3..1cf9ccb01013 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3091,7 +3091,7 @@ static const struct file_operations *stat_fops[] = { static int kvm_init_debug(void) { - int r = -EFAULT; + int r = -EEXIST; struct kvm_stats_debugfs_item *p; kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); |