From 3f666c56c6b8cc40a5e9002aac484b8f5b83c402 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 3 Jan 2020 13:33:07 -0500 Subject: dax: Pass dax_dev instead of bdev to dax_writeback_mapping_range() As of now dax_writeback_mapping_range() takes "struct block_device" as a parameter and dax_dev is searched from bdev name. This also involves taking a fresh reference on dax_dev and putting that reference at the end of function. We are developing a new filesystem virtio-fs and using dax to access host page cache directly. But there is no block device. IOW, we want to make use of dax but want to get rid of this assumption that there is always a block device associated with dax_dev. So pass in "struct dax_device" as parameter instead of bdev. ext2/ext4/xfs are current users and they already have a reference on dax_device. So there is no need to take reference and drop reference to dax_device on each call of this function. Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Vivek Goyal Link: https://lore.kernel.org/r/20200103183307.GB13350@redhat.com Signed-off-by: Dan Williams --- include/linux/dax.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index 9bd8528bd305..d5932e47c597 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -141,7 +141,7 @@ static inline void fs_put_dax(struct dax_device *dax_dev) struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); int dax_writeback_mapping_range(struct address_space *mapping, - struct block_device *bdev, struct writeback_control *wbc); + struct dax_device *dax_dev, struct writeback_control *wbc); struct page *dax_layout_busy_page(struct address_space *mapping); dax_entry_t dax_lock_page(struct page *page); @@ -180,7 +180,7 @@ static inline struct page *dax_layout_busy_page(struct address_space *mapping) } static inline int dax_writeback_mapping_range(struct address_space *mapping, - struct block_device *bdev, struct writeback_control *wbc) + struct dax_device *dax_dev, struct writeback_control *wbc) { return -EOPNOTSUPP; } -- cgit v1.2.3 From f01b16a85bfae2e6b4f32de0a1f37ac4050dc316 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 6 Jan 2020 13:11:17 -0500 Subject: dax: Get rid of fs_dax_get_by_host() helper Looks like nobody is using fs_dax_get_by_host() except fs_dax_get_by_bdev() and it can easily use dax_get_by_host() instead. IIUC, fs_dax_get_by_host() was only introduced so that one could compile with CONFIG_FS_DAX=n and CONFIG_DAX=m. fs_dax_get_by_bdev() achieves the same purpose and hence it looks like fs_dax_get_by_host() is not needed anymore. Signed-off-by: Vivek Goyal Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20200106181117.GA16248@redhat.com Signed-off-by: Dan Williams --- include/linux/dax.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index d5932e47c597..328c2dbb4409 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -129,11 +129,6 @@ static inline bool generic_fsdax_supported(struct dax_device *dax_dev, sectors); } -static inline struct dax_device *fs_dax_get_by_host(const char *host) -{ - return dax_get_by_host(host); -} - static inline void fs_put_dax(struct dax_device *dax_dev) { put_dax(dax_dev); @@ -160,11 +155,6 @@ static inline bool generic_fsdax_supported(struct dax_device *dax_dev, return false; } -static inline struct dax_device *fs_dax_get_by_host(const char *host) -{ - return NULL; -} - static inline void fs_put_dax(struct dax_device *dax_dev) { } -- cgit v1.2.3 From ebc0f83c78a2d26384401ecf2d2fa48063c0ee27 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Sat, 11 Jan 2020 04:53:39 -0500 Subject: timers/nohz: Update NOHZ load in remote tick The way loadavg is tracked during nohz only pays attention to the load upon entering nohz. This can be particularly noticeable if full nohz is entered while non-idle, and then the cpu goes idle and stays that way for a long time. Use the remote tick to ensure that full nohz cpus report their deltas within a reasonable time. [ swood: Added changelog and removed recheck of stopped tick. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Scott Wood Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/1578736419-14628-3-git-send-email-swood@redhat.com --- include/linux/sched/nohz.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index 1abe91ff6e4a..6d67e9a5af6b 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -15,9 +15,11 @@ static inline void nohz_balance_enter_idle(int cpu) { } #ifdef CONFIG_NO_HZ_COMMON void calc_load_nohz_start(void); +void calc_load_nohz_remote(struct rq *rq); void calc_load_nohz_stop(void); #else static inline void calc_load_nohz_start(void) { } +static inline void calc_load_nohz_remote(struct rq *rq) { } static inline void calc_load_nohz_stop(void) { } #endif /* CONFIG_NO_HZ_COMMON */ -- cgit v1.2.3 From 183edb20e60a73925bf3b60e2f4796898167262f Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Mon, 3 Feb 2020 15:45:17 +0000 Subject: cpufreq: Make cpufreq_global_kobject static The cpufreq_global_kobject is only used internally by cpufreq.c after commit 2361be236662 ("cpufreq: Don't create empty /sys/devices/system/cpu/cpufreq directory"). Make it static. Signed-off-by: Yangtao Li [ rjw: Add empty line after cpufreq_global_kobject definition ] Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 018dce868de6..0fb561d1b524 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -201,9 +201,6 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy) return cpumask_weight(policy->cpus) > 1; } -/* /sys/devices/system/cpu/cpufreq: entry point for global variables */ -extern struct kobject *cpufreq_global_kobject; - #ifdef CONFIG_CPU_FREQ unsigned int cpufreq_get(unsigned int cpu); unsigned int cpufreq_quick_get(unsigned int cpu); -- cgit v1.2.3 From 4a47cbae04844f0c5e2365aa6c217b61850bb832 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Feb 2020 14:44:38 +0100 Subject: dma-direct: improve swiotlb error reporting Untangle the way how dma_direct_map_page calls into swiotlb to be able to properly report errors where the swiotlb DMA address overflows the mask separately from overflows in the !swiotlb case. This means that siotlb_map now has to do a little more work that duplicates dma_direct_map_page, but doing so greatly simplifies the calling convention. Signed-off-by: Christoph Hellwig Reviewed-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index cde3dc18e21a..046bb94bd4d6 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -64,6 +64,9 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev, size_t size, enum dma_data_direction dir, enum dma_sync_target target); +dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs); + #ifdef CONFIG_SWIOTLB extern enum swiotlb_force swiotlb_force; extern phys_addr_t io_tlb_start, io_tlb_end; @@ -73,8 +76,6 @@ static inline bool is_swiotlb_buffer(phys_addr_t paddr) return paddr >= io_tlb_start && paddr < io_tlb_end; } -bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr, - size_t size, enum dma_data_direction dir, unsigned long attrs); void __init swiotlb_exit(void); unsigned int swiotlb_max_segment(void); size_t swiotlb_max_mapping_size(struct device *dev); @@ -85,12 +86,6 @@ static inline bool is_swiotlb_buffer(phys_addr_t paddr) { return false; } -static inline bool swiotlb_map(struct device *dev, phys_addr_t *phys, - dma_addr_t *dma_addr, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - return false; -} static inline void swiotlb_exit(void) { } -- cgit v1.2.3 From f2b18baca9539c6a3116d48b70972c7a2ba5d766 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 Jan 2020 12:25:50 +0100 Subject: mac80211: use more bits for ack_frame_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It turns out that this wasn't a good idea, I hit a test failure in hwsim due to this. That particular failure was easily worked around, but it raised questions: if an AP needs to, for example, send action frames to each connected station, the current limit is nowhere near enough (especially if those stations are sleeping and the frames are queued for a while.) Shuffle around some bits to make more room for ack_frame_id to allow up to 8192 queued up frames, that's enough for queueing 4 frames to each connected station, even at the maximum of 2007 stations on a single AP. We take the bits from band (which currently only 2 but I leave 3 in case we add another band) and from the hw_queue, which can only need 4 since it has a limit of 16 queues. Fixes: 6912daed05e1 ("mac80211: Shrink the size of ack_frame_id to make room for tx_time_est") Signed-off-by: Johannes Berg Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20200115122549.b9a4ef9f4980.Ied52ed90150220b83a280009c590b65d125d087c@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index aa145808e57a..77e6b5a83b06 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1004,12 +1004,11 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) struct ieee80211_tx_info { /* common information */ u32 flags; - u8 band; - - u8 hw_queue; - - u16 ack_frame_id:6; - u16 tx_time_est:10; + u32 band:3, + ack_frame_id:13, + hw_queue:4, + tx_time_est:10; + /* 2 free bits */ union { struct { -- cgit v1.2.3 From e7598fac323aad0e502415edeffd567315994dd6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 10 Feb 2020 10:36:56 +0100 Subject: iommu/vt-d: Fix compile warning from intel-svm.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intel_svm_is_pasid_valid() needs to be marked inline, otherwise it causes the compile warning below: CC [M] drivers/dma/idxd/cdev.o In file included from drivers/dma/idxd/cdev.c:9:0: ./include/linux/intel-svm.h:125:12: warning: ‘intel_svm_is_pasid_valid’ defined but not used [-Wunused-function] static int intel_svm_is_pasid_valid(struct device *dev, int pasid) ^~~~~~~~~~~~~~~~~~~~~~~~ Reported-by: Borislav Petkov Fixes: 15060aba71711 ('iommu/vt-d: Helper function to query if a pasid has any active users') Signed-off-by: Joerg Roedel --- include/linux/intel-svm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 94f047a8a845..d7c403d0dd27 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -122,7 +122,7 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid) BUG(); } -static int intel_svm_is_pasid_valid(struct device *dev, int pasid) +static inline int intel_svm_is_pasid_valid(struct device *dev, int pasid) { return -EINVAL; } -- cgit v1.2.3 From ca4b43c14cd88d28cfc6467d2fa075aad6818f1d Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Sat, 1 Feb 2020 14:13:44 +0800 Subject: usb: charger: assign specific number for enum value To work properly on every architectures and compilers, the enum value needs to be specific numbers. Suggested-by: Greg KH Signed-off-by: Peter Chen Link: https://lore.kernel.org/r/1580537624-10179-1-git-send-email-peter.chen@nxp.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/charger.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/usb/charger.h b/include/uapi/linux/usb/charger.h index 5f72af35b3ed..ad22079125bf 100644 --- a/include/uapi/linux/usb/charger.h +++ b/include/uapi/linux/usb/charger.h @@ -14,18 +14,18 @@ * ACA (Accessory Charger Adapters) */ enum usb_charger_type { - UNKNOWN_TYPE, - SDP_TYPE, - DCP_TYPE, - CDP_TYPE, - ACA_TYPE, + UNKNOWN_TYPE = 0, + SDP_TYPE = 1, + DCP_TYPE = 2, + CDP_TYPE = 3, + ACA_TYPE = 4, }; /* USB charger state */ enum usb_charger_state { - USB_CHARGER_DEFAULT, - USB_CHARGER_PRESENT, - USB_CHARGER_ABSENT, + USB_CHARGER_DEFAULT = 0, + USB_CHARGER_PRESENT = 1, + USB_CHARGER_ABSENT = 2, }; #endif /* _UAPI__LINUX_USB_CHARGER_H */ -- cgit v1.2.3 From 73f8bda9b5dc1c69df2bc55c0cbb24461a6391a9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 3 Feb 2020 16:38:28 +0100 Subject: USB: core: add endpoint-blacklist quirk Add a new device quirk that can be used to blacklist endpoints. Since commit 3e4f8e21c4f2 ("USB: core: fix check for duplicate endpoints") USB core ignores any duplicate endpoints found during descriptor parsing. In order to handle devices where the first interfaces with duplicate endpoints are the ones that should have their endpoints ignored, we need to add a blacklist. Tested-by: edes Cc: stable Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20200203153830.26394-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/quirks.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index a1be64c9940f..22c1f579afe3 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -69,4 +69,7 @@ /* Hub needs extra delay after resetting its port. */ #define USB_QUIRK_HUB_SLOW_RESET BIT(14) +/* device has blacklisted endpoints */ +#define USB_QUIRK_ENDPOINT_BLACKLIST BIT(15) + #endif /* __LINUX_USB_QUIRKS_H */ -- cgit v1.2.3 From 0c5aae59270fb1f827acce182786094c9ccf598e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 10 Feb 2020 15:57:30 +0100 Subject: serdev: ttyport: restore client ops on deregistration The serdev tty-port controller driver should reset the tty-port client operations also on deregistration to avoid a NULL-pointer dereference in case the port is later re-registered as a normal tty device. Note that this can only happen with tty drivers such as 8250 which have statically allocated port structures that can end up being reused and where a later registration would not register a serdev controller (e.g. due to registration errors or if the devicetree has been changed in between). Specifically, this can be an issue for any statically defined ports that would be registered by 8250 core when an 8250 driver is being unbound. Fixes: bed35c6dfa6a ("serdev: add a tty port controller driver") Cc: stable # 4.11 Reported-by: Loic Poulain Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20200210145730.22762-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/tty.h b/include/linux/tty.h index bfa4e2ee94a9..bd5fe0e907e8 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -225,6 +225,8 @@ struct tty_port_client_operations { void (*write_wakeup)(struct tty_port *port); }; +extern const struct tty_port_client_operations tty_port_default_client_ops; + struct tty_port { struct tty_bufhead buf; /* Locked internally */ struct tty_struct *tty; /* Back pointer */ -- cgit v1.2.3 From 7276531d4036f5db2af15c8b6caa02e7741f5d80 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 10 Feb 2020 17:06:50 -0600 Subject: tracing: Consolidate trace() functions Move the checking, buffer reserve and buffer commit code in synth_event_trace_start/end() into inline functions __synth_event_trace_start/end() so they can also be used by synth_event_trace() and synth_event_trace_array(), and then have all those functions use them. Also, change synth_event_trace_state.enabled to disabled so it only needs to be set if the event is disabled, which is not normally the case. Link: http://lkml.kernel.org/r/b1f3108d0f450e58192955a300e31d0405ab4149.1581374549.git.zanussi@kernel.org Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 67f528ecb9e5..21098298b49b 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -424,7 +424,7 @@ struct synth_event_trace_state { struct synth_event *event; unsigned int cur_field; unsigned int n_u64; - bool enabled; + bool disabled; bool add_next; bool add_name; }; -- cgit v1.2.3 From e3728b50cd9be7d4b1469447cdf1feb93e3b7adb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Feb 2020 10:11:02 +0100 Subject: ACPI: PM: s2idle: Avoid possible race related to the EC GPE It is theoretically possible for the ACPI EC GPE to be set after the s2idle_ops->wake() called from s2idle_loop() has returned and before the subsequent pm_wakeup_pending() check is carried out. If that happens, the resulting wakeup event will cause the system to resume even though it may be a spurious one. To avoid that race, first make the ->wake() callback in struct platform_s2idle_ops return a bool value indicating whether or not to let the system resume and rearrange s2idle_loop() to use that value instad of the direct pm_wakeup_pending() call if ->wake() is present. Next, rework acpi_s2idle_wake() to process EC events and check pm_wakeup_pending() before re-arming the SCI for system wakeup to prevent it from triggering prematurely and add comments to that function to explain the rationale for the new code flow. Fixes: 56b991849009 ("PM: sleep: Simplify suspend-to-idle control flow") Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 4a230c2f1c31..2b2055b035ee 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -191,7 +191,7 @@ struct platform_s2idle_ops { int (*begin)(void); int (*prepare)(void); int (*prepare_late)(void); - void (*wake)(void); + bool (*wake)(void); void (*restore_early)(void); void (*restore)(void); void (*end)(void); -- cgit v1.2.3 From ea128834dd76f9a72a35d011c651fa96658f06a7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Feb 2020 17:52:32 +0100 Subject: ACPICA: Introduce acpi_any_gpe_status_set() Introduce a new helper function, acpi_any_gpe_status_set(), for checking the status bits of all enabled GPEs in one go. It is needed to distinguish spurious SCIs from genuine ones when deciding whether or not to wake up the system from suspend-to-idle. Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 00994b1b8681..5867777bb7d0 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -752,6 +752,7 @@ ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_dispatch_gpe(acpi_handle gpe_device, u3 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void)) +ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_any_gpe_status_set(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_get_gpe_device(u32 gpe_index, -- cgit v1.2.3 From 84a4062632462c4320704fcdf8e99e89e94c0aba Mon Sep 17 00:00:00 2001 From: Johan Korsnes Date: Fri, 17 Jan 2020 13:08:36 +0100 Subject: HID: core: increase HID report buffer size to 8KiB We have a HID touch device that reports its opens and shorts test results in HID buffers of size 8184 bytes. The maximum size of the HID buffer is currently set to 4096 bytes, causing probe of this device to fail. With this patch we increase the maximum size of the HID buffer to 8192 bytes, making device probe and acquisition of said buffers succeed. Signed-off-by: Johan Korsnes Cc: Alan Stern Cc: Armando Visconti Cc: Jiri Kosina Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index cd41f209043f..875f71132b14 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -492,7 +492,7 @@ struct hid_report_enum { }; #define HID_MIN_BUFFER_SIZE 64 /* make sure there is at least a packet size of space */ -#define HID_MAX_BUFFER_SIZE 4096 /* 4kb */ +#define HID_MAX_BUFFER_SIZE 8192 /* 8kb */ #define HID_CONTROL_FIFO_SIZE 256 /* to init devices with >100 reports */ #define HID_OUTPUT_FIFO_SIZE 64 -- cgit v1.2.3 From efeda80da38d0b4afd77a12bd4a44f657567d26c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 5 Feb 2020 09:01:54 -0500 Subject: NFSv4: Fix revalidation of dentries with delegations If a dentry was not initially looked up while we were holding a delegation, then we do still need to revalidate that it still holds the same name. If there are multiple hard links to the same file, then all the hard links need validation. Reported-by: Benjamin Coddington Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington [Anna: Put nfs_unset_verifier_delegated() under CONFIG_NFS_V4] Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a5f8f03ecd59..5d5b91e54f73 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -337,35 +337,17 @@ static inline int nfs_server_capable(struct inode *inode, int cap) return NFS_SERVER(inode)->caps & cap; } -static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) -{ - dentry->d_time = verf; -} - /** * nfs_save_change_attribute - Returns the inode attribute change cookie * @dir - pointer to parent directory inode - * The "change attribute" is updated every time we finish an operation - * that will result in a metadata change on the server. + * The "cache change attribute" is updated when we need to revalidate + * our dentry cache after a directory was seen to change on the server. */ static inline unsigned long nfs_save_change_attribute(struct inode *dir) { return NFS_I(dir)->cache_change_attribute; } -/** - * nfs_verify_change_attribute - Detects NFS remote directory changes - * @dir - pointer to parent directory inode - * @chattr - previously saved change attribute - * Return "false" if the verifiers doesn't match the change attribute. - * This would usually indicate that the directory contents have changed on - * the server, and that any dentries need revalidating. - */ -static inline int nfs_verify_change_attribute(struct inode *dir, unsigned long chattr) -{ - return chattr == NFS_I(dir)->cache_change_attribute; -} - /* * linux/fs/nfs/inode.c */ @@ -495,6 +477,10 @@ extern const struct file_operations nfs_dir_operations; extern const struct dentry_operations nfs_dentry_operations; extern void nfs_force_lookup_revalidate(struct inode *dir); +extern void nfs_set_verifier(struct dentry * dentry, unsigned long verf); +#if IS_ENABLED(CONFIG_NFS_V4) +extern void nfs_clear_verifier_delegated(struct inode *inode); +#endif /* IS_ENABLED(CONFIG_NFS_V4) */ extern struct dentry *nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label); -- cgit v1.2.3 From 0bf999f9c5e74c7ecf9dafb527146601e5c848b9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 9 Feb 2020 19:36:14 -0800 Subject: linux/pipe_fs_i.h: fix kernel-doc warnings after @wait was split Fix kernel-doc warnings in struct pipe_inode_info after @wait was split into @rd_wait and @wr_wait. include/linux/pipe_fs_i.h:66: warning: Function parameter or member 'rd_wait' not described in 'pipe_inode_info' include/linux/pipe_fs_i.h:66: warning: Function parameter or member 'wr_wait' not described in 'pipe_inode_info' Fixes: 0ddad21d3e99 ("pipe: use exclusive waits when reading or writing") Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index d5765039652a..ae58fad7f1e0 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -29,7 +29,8 @@ struct pipe_buffer { /** * struct pipe_inode_info - a linux kernel pipe * @mutex: mutex protecting the whole thing - * @wait: reader/writer wait point in case of empty/full pipe + * @rd_wait: reader wait point in case of empty pipe + * @wr_wait: writer wait point in case of full pipe * @head: The point of buffer production * @tail: The point of buffer consumption * @max_usage: The maximum number of slots that may be used in the ring -- cgit v1.2.3 From 6ee2deb6fbed6ed343040215d10f3c73d00304df Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 11 Feb 2020 18:31:54 +0800 Subject: net/flow_dissector: remove unexist field description @thoff has moved to struct flow_dissector_key_control. Fixes: 42aecaa9bb2b ("net: Get skb hash over flow_keys structure") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index d93017a7ce5c..e9391e877f9a 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -33,7 +33,6 @@ enum flow_dissect_ret { /** * struct flow_dissector_key_basic: - * @thoff: Transport header offset * @n_proto: Network header protocol (eg. IPv4/IPv6) * @ip_proto: Transport header protocol (eg. TCP/UDP) */ -- cgit v1.2.3 From 0b41713b606694257b90d61ba7e2712d8457648b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 11 Feb 2020 20:47:05 +0100 Subject: icmp: introduce helper for nat'd source address in network device context This introduces a helper function to be called only by network drivers that wraps calls to icmp[v6]_send in a conntrack transformation, in case NAT has been used. We don't want to pollute the non-driver path, though, so we introduce this as a helper to be called by places that actually make use of this, as suggested by Florian. Signed-off-by: Jason A. Donenfeld Cc: Florian Westphal Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 6 ++++++ include/net/icmp.h | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index ef1cbb5f454f..93338fd54af8 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -31,6 +31,12 @@ static inline void icmpv6_send(struct sk_buff *skb, } #endif +#if IS_ENABLED(CONFIG_NF_NAT) +void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info); +#else +#define icmpv6_ndo_send icmpv6_send +#endif + extern int icmpv6_init(void); extern int icmpv6_err_convert(u8 type, u8 code, int *err); diff --git a/include/net/icmp.h b/include/net/icmp.h index 5d4bfdba9adf..9ac2d2672a93 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -43,6 +43,12 @@ static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt); } +#if IS_ENABLED(CONFIG_NF_NAT) +void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info); +#else +#define icmp_ndo_send icmp_send +#endif + int icmp_rcv(struct sk_buff *skb); int icmp_err(struct sk_buff *skb, u32 info); int icmp_init(void); -- cgit v1.2.3 From 1dade3a7048ccfc675650cd2cf13d578b095e5fb Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 12 Feb 2020 17:59:39 +0300 Subject: ACPICA: Introduce ACPI_ACCESS_BYTE_WIDTH() macro Sometimes it is useful to find the access_width field value in bytes and not in bits so add a helper that can be used for this purpose. Suggested-by: Jean Delvare Signed-off-by: Mika Westerberg Reviewed-by: Jean Delvare Cc: 4.16+ # 4.16+ Signed-off-by: Rafael J. Wysocki --- include/acpi/actypes.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index a2583c2bc054..4defed58ea33 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -532,11 +532,12 @@ typedef u64 acpi_integer; strnlen (a, ACPI_NAMESEG_SIZE) == ACPI_NAMESEG_SIZE) /* - * Algorithm to obtain access bit width. + * Algorithm to obtain access bit or byte width. * Can be used with access_width of struct acpi_generic_address and access_size of * struct acpi_resource_generic_register. */ #define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2)) +#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1)) /******************************************************************************* * -- cgit v1.2.3 From dfa9a5efe8b932a84b3b319250aa3ac60c20f876 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 14 Feb 2020 12:13:16 +0100 Subject: ALSA: rawmidi: Avoid bit fields for state flags The rawmidi state flags (opened, append, active_sensing) are stored in bit fields that can be potentially racy when concurrently accessed without any locks. Although the current code should be fine, there is also no any real benefit by keeping the bitfields for this kind of short number of members. This patch changes those bit fields flags to the simple bool fields. There should be no size increase of the snd_rawmidi_substream by this change. Reported-by: syzbot+576cc007eb9f2c968200@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20200214111316.26939-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/rawmidi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index 40ab20439fee..a36b7227a15a 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -77,9 +77,9 @@ struct snd_rawmidi_substream { struct list_head list; /* list of all substream for given stream */ int stream; /* direction */ int number; /* substream number */ - unsigned int opened: 1, /* open flag */ - append: 1, /* append flag (merge more streams) */ - active_sensing: 1; /* send active sensing when close */ + bool opened; /* open flag */ + bool append; /* append flag (merge more streams) */ + bool active_sensing; /* send active sensing when close */ int use_count; /* use counter (for output) */ size_t bytes; struct snd_rawmidi *rmidi; -- cgit v1.2.3 From a1fa83bdab784fa0ff2e92870011c0dcdbd2f680 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 12 Feb 2020 22:28:20 -0800 Subject: netdevice.h: fix all kernel-doc and Sphinx warnings Eliminate all kernel-doc and Sphinx warnings in . Fixes these warnings: ../include/linux/netdevice.h:2100: warning: Function parameter or member 'gso_partial_features' not described in 'net_device' ../include/linux/netdevice.h:2100: warning: Function parameter or member 'l3mdev_ops' not described in 'net_device' ../include/linux/netdevice.h:2100: warning: Function parameter or member 'xfrmdev_ops' not described in 'net_device' ../include/linux/netdevice.h:2100: warning: Function parameter or member 'tlsdev_ops' not described in 'net_device' ../include/linux/netdevice.h:2100: warning: Function parameter or member 'name_assign_type' not described in 'net_device' ../include/linux/netdevice.h:2100: warning: Function parameter or member 'ieee802154_ptr' not described in 'net_device' ../include/linux/netdevice.h:2100: warning: Function parameter or member 'mpls_ptr' not described in 'net_device' ../include/linux/netdevice.h:2100: warning: Function parameter or member 'xdp_prog' not described in 'net_device' ../include/linux/netdevice.h:2100: warning: Function parameter or member 'gro_flush_timeout' not described in 'net_device' ../include/linux/netdevice.h:2100: warning: Function parameter or member 'xdp_bulkq' not described in 'net_device' ../include/linux/netdevice.h:2100: warning: Function parameter or member 'xps_cpus_map' not described in 'net_device' ../include/linux/netdevice.h:2100: warning: Function parameter or member 'xps_rxqs_map' not described in 'net_device' ../include/linux/netdevice.h:2100: warning: Function parameter or member 'qdisc_hash' not described in 'net_device' ../include/linux/netdevice.h:3552: WARNING: Inline emphasis start-string without end-string. ../include/linux/netdevice.h:3552: WARNING: Inline emphasis start-string without end-string. Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/netdevice.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a9c6b5c61d27..9f1f633235f6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1616,6 +1616,7 @@ enum netdev_priv_flags { * and drivers will need to set them appropriately. * * @mpls_features: Mask of features inheritable by MPLS + * @gso_partial_features: value(s) from NETIF_F_GSO\* * * @ifindex: interface index * @group: The group the device belongs to @@ -1640,8 +1641,11 @@ enum netdev_priv_flags { * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions * @ethtool_ops: Management operations + * @l3mdev_ops: Layer 3 master device operations * @ndisc_ops: Includes callbacks for different IPv6 neighbour * discovery handling. Necessary for e.g. 6LoWPAN. + * @xfrmdev_ops: Transformation offload operations + * @tlsdev_ops: Transport Layer Security offload operations * @header_ops: Includes callbacks for creating,parsing,caching,etc * of Layer 2 headers. * @@ -1680,6 +1684,7 @@ enum netdev_priv_flags { * @dev_port: Used to differentiate devices that share * the same function * @addr_list_lock: XXX: need comments on this one + * @name_assign_type: network interface name assignment type * @uc_promisc: Counter that indicates promiscuous mode * has been enabled due to the need to listen to * additional unicast addresses in a device that @@ -1702,6 +1707,9 @@ enum netdev_priv_flags { * @ip6_ptr: IPv6 specific data * @ax25_ptr: AX.25 specific data * @ieee80211_ptr: IEEE 802.11 specific data, assign before registering + * @ieee802154_ptr: IEEE 802.15.4 low-rate Wireless Personal Area Network + * device struct + * @mpls_ptr: mpls_dev struct pointer * * @dev_addr: Hw address (before bcast, * because most packets are unicast) @@ -1710,6 +1718,8 @@ enum netdev_priv_flags { * @num_rx_queues: Number of RX queues * allocated at register_netdev() time * @real_num_rx_queues: Number of RX queues currently active in device + * @xdp_prog: XDP sockets filter program pointer + * @gro_flush_timeout: timeout for GRO layer in NAPI * * @rx_handler: handler for received packets * @rx_handler_data: XXX: need comments on this one @@ -1731,10 +1741,14 @@ enum netdev_priv_flags { * @qdisc: Root qdisc from userspace point of view * @tx_queue_len: Max frames per queue allowed * @tx_global_lock: XXX: need comments on this one + * @xdp_bulkq: XDP device bulk queue + * @xps_cpus_map: all CPUs map for XPS device + * @xps_rxqs_map: all RXQs map for XPS device * * @xps_maps: XXX: need comments on this one * @miniq_egress: clsact qdisc specific data for * egress processing + * @qdisc_hash: qdisc hash table * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers @@ -3548,7 +3562,7 @@ static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, } /** - * netif_attrmask_next_and - get the next CPU/Rx queue in *src1p & *src2p + * netif_attrmask_next_and - get the next CPU/Rx queue in \*src1p & \*src2p * @n: CPU/Rx queue index * @src1p: the first CPUs/Rx queues mask pointer * @src2p: the second CPUs/Rx queues mask pointer -- cgit v1.2.3 From 8f486296459c084b106d907414540301bd9485fd Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Fri, 14 Feb 2020 18:57:43 +0800 Subject: ASoC: dapm: remove snd_soc_dapm_put_enum_double_locked Reverts commit 839284e79482 ("ASoC: dapm: add snd_soc_dapm_put_enum_double_locked"). Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20200214105744.82258-3-tzungbi@google.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 2a306c6f3fbc..1b6afbc1a4ed 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -392,8 +392,6 @@ int snd_soc_dapm_get_enum_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); -int snd_soc_dapm_put_enum_double_locked(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol); int snd_soc_dapm_info_pin_switch(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo); int snd_soc_dapm_get_pin_switch(struct snd_kcontrol *kcontrol, -- cgit v1.2.3 From 807b9515b7d044cf77df31f1af9d842a76ecd5cb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 12 Feb 2020 21:09:00 -0800 Subject: scsi: Revert "target: iscsi: Wait for all commands to finish before freeing a session" Since commit e9d3009cb936 introduced a regression and since the fix for that regression was not perfect, revert this commit. Link: https://marc.info/?l=target-devel&m=158157054906195 Cc: Rahul Kundu Cc: Mike Marciniszyn Cc: Sagi Grimberg Reported-by: Dakshaja Uppalapati Fixes: e9d3009cb936 ("scsi: target: iscsi: Wait for all commands to finish before freeing a session") Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/scsi/iscsi_proto.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h index 533f56733ba8..b71b5c4f418c 100644 --- a/include/scsi/iscsi_proto.h +++ b/include/scsi/iscsi_proto.h @@ -627,7 +627,6 @@ struct iscsi_reject { #define ISCSI_REASON_BOOKMARK_INVALID 9 #define ISCSI_REASON_BOOKMARK_NO_RESOURCES 10 #define ISCSI_REASON_NEGOTIATION_RESET 11 -#define ISCSI_REASON_WAITING_FOR_LOGOUT 12 /* Max. number of Key=Value pairs in a text message */ #define MAX_KEY_VALUE_PAIRS 8192 -- cgit v1.2.3 From 7151affeef8d527f50b4b68a871fd28bd660023f Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 15 Feb 2020 10:50:21 +0000 Subject: net: export netdev_next_lower_dev_rcu() netdev_next_lower_dev_rcu() will be used to implement a function, which is to walk all lower interfaces. There are already functions that they walk their lower interface. (netdev_walk_all_lower_dev_rcu, netdev_walk_all_lower_dev()). But, there would be cases that couldn't be covered by given netdev_walk_all_lower_dev_{rcu}() function. So, some modules would want to implement own function, which is to walk all lower interfaces. In the next patch, netdev_next_lower_dev_rcu() will be used. In addition, this patch removes two unused prototypes in netdevice.h. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9f1f633235f6..6c3f7032e8d9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -72,6 +72,8 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ #define NET_RX_DROP 1 /* packet dropped */ +#define MAX_NEST_DEV 8 + /* * Transmit return codes: transmit return codes originate from three different * namespaces: @@ -4389,11 +4391,8 @@ void *netdev_lower_get_next(struct net_device *dev, ldev; \ ldev = netdev_lower_get_next(dev, &(iter))) -struct net_device *netdev_all_lower_get_next(struct net_device *dev, +struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, struct list_head **iter); -struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, - struct list_head **iter); - int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *lower_dev, void *data), -- cgit v1.2.3 From 66256e0b15bd72e1e1c24c4cef4281a95636781c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 15 Feb 2020 11:42:37 -0800 Subject: net/sock.h: fix all kernel-doc warnings Fix all kernel-doc warnings for . Fixes these warnings: ../include/net/sock.h:232: warning: Function parameter or member 'skc_addrpair' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_portpair' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_ipv6only' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_net_refcnt' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_v6_daddr' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_v6_rcv_saddr' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_cookie' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_listener' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_tw_dr' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_rcv_wnd' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_tw_rcv_nxt' not described in 'sock_common' ../include/net/sock.h:498: warning: Function parameter or member 'sk_rx_skb_cache' not described in 'sock' ../include/net/sock.h:498: warning: Function parameter or member 'sk_wq_raw' not described in 'sock' ../include/net/sock.h:498: warning: Function parameter or member 'tcp_rtx_queue' not described in 'sock' ../include/net/sock.h:498: warning: Function parameter or member 'sk_tx_skb_cache' not described in 'sock' ../include/net/sock.h:498: warning: Function parameter or member 'sk_route_forced_caps' not described in 'sock' ../include/net/sock.h:498: warning: Function parameter or member 'sk_txtime_report_errors' not described in 'sock' ../include/net/sock.h:498: warning: Function parameter or member 'sk_validate_xmit_skb' not described in 'sock' ../include/net/sock.h:498: warning: Function parameter or member 'sk_bpf_storage' not described in 'sock' ../include/net/sock.h:2024: warning: No description found for return value of 'sk_wmem_alloc_get' ../include/net/sock.h:2035: warning: No description found for return value of 'sk_rmem_alloc_get' ../include/net/sock.h:2046: warning: No description found for return value of 'sk_has_allocations' ../include/net/sock.h:2082: warning: No description found for return value of 'skwq_has_sleeper' ../include/net/sock.h:2244: warning: No description found for return value of 'sk_page_frag' ../include/net/sock.h:2444: warning: Function parameter or member 'tcp_rx_skb_cache_key' not described in 'DECLARE_STATIC_KEY_FALSE' ../include/net/sock.h:2444: warning: Excess function parameter 'sk' description in 'DECLARE_STATIC_KEY_FALSE' ../include/net/sock.h:2444: warning: Excess function parameter 'skb' description in 'DECLARE_STATIC_KEY_FALSE' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/net/sock.h | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 02162b0378f7..328564525526 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -117,19 +117,26 @@ typedef __u64 __bitwise __addrpair; * struct sock_common - minimal network layer representation of sockets * @skc_daddr: Foreign IPv4 addr * @skc_rcv_saddr: Bound local IPv4 addr + * @skc_addrpair: 8-byte-aligned __u64 union of @skc_daddr & @skc_rcv_saddr * @skc_hash: hash value used with various protocol lookup tables * @skc_u16hashes: two u16 hash values used by UDP lookup tables * @skc_dport: placeholder for inet_dport/tw_dport * @skc_num: placeholder for inet_num/tw_num + * @skc_portpair: __u32 union of @skc_dport & @skc_num * @skc_family: network address family * @skc_state: Connection state * @skc_reuse: %SO_REUSEADDR setting * @skc_reuseport: %SO_REUSEPORT setting + * @skc_ipv6only: socket is IPV6 only + * @skc_net_refcnt: socket is using net ref counting * @skc_bound_dev_if: bound device index if != 0 * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol * @skc_prot: protocol handlers inside a network family * @skc_net: reference to the network namespace of this socket + * @skc_v6_daddr: IPV6 destination address + * @skc_v6_rcv_saddr: IPV6 source address + * @skc_cookie: socket's cookie value * @skc_node: main hash linkage for various protocol lookup tables * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol * @skc_tx_queue_mapping: tx queue number for this connection @@ -137,7 +144,15 @@ typedef __u64 __bitwise __addrpair; * @skc_flags: place holder for sk_flags * %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings + * @skc_listener: connection request listener socket (aka rsk_listener) + * [union with @skc_flags] + * @skc_tw_dr: (aka tw_dr) ptr to &struct inet_timewait_death_row + * [union with @skc_flags] * @skc_incoming_cpu: record/match cpu processing incoming packets + * @skc_rcv_wnd: (aka rsk_rcv_wnd) TCP receive window size (possibly scaled) + * [union with @skc_incoming_cpu] + * @skc_tw_rcv_nxt: (aka tw_rcv_nxt) TCP window next expected seq number + * [union with @skc_incoming_cpu] * @skc_refcnt: reference count * * This is the minimal network layer representation of sockets, the header @@ -245,6 +260,7 @@ struct bpf_sk_storage; * @sk_dst_cache: destination cache * @sk_dst_pending_confirm: need to confirm neighbour * @sk_policy: flow policy + * @sk_rx_skb_cache: cache copy of recently accessed RX skb * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed * @sk_tsq_flags: TCP Small Queues flags @@ -265,6 +281,8 @@ struct bpf_sk_storage; * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) + * @sk_route_forced_caps: static, forced route capabilities + * (set in tcp_init_sock()) * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_gso_max_size: Maximum GSO segment size to build * @sk_gso_max_segs: Maximum number of GSO segments @@ -303,6 +321,8 @@ struct bpf_sk_storage; * @sk_frag: cached page frag * @sk_peek_off: current peek_offset value * @sk_send_head: front of stuff to transmit + * @tcp_rtx_queue: TCP re-transmit queue [union with @sk_send_head] + * @sk_tx_skb_cache: cache copy of recently accessed TX skb * @sk_security: used by security modules * @sk_mark: generic packet mark * @sk_cgrp_data: cgroup data for this cgroup @@ -313,11 +333,14 @@ struct bpf_sk_storage; * @sk_write_space: callback to indicate there is bf sending space available * @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE) * @sk_backlog_rcv: callback to process the backlog + * @sk_validate_xmit_skb: ptr to an optional validate function * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 * @sk_reuseport_cb: reuseport group container + * @sk_bpf_storage: ptr to cache and control for bpf_sk_storage * @sk_rcu: used during RCU grace period * @sk_clockid: clockid used by time-based scheduling (SO_TXTIME) * @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME + * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags */ struct sock { @@ -393,7 +416,9 @@ struct sock { struct sk_filter __rcu *sk_filter; union { struct socket_wq __rcu *sk_wq; + /* private: */ struct socket_wq *sk_wq_raw; + /* public: */ }; #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; @@ -2017,7 +2042,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro * sk_wmem_alloc_get - returns write allocations * @sk: socket * - * Returns sk_wmem_alloc minus initial offset of one + * Return: sk_wmem_alloc minus initial offset of one */ static inline int sk_wmem_alloc_get(const struct sock *sk) { @@ -2028,7 +2053,7 @@ static inline int sk_wmem_alloc_get(const struct sock *sk) * sk_rmem_alloc_get - returns read allocations * @sk: socket * - * Returns sk_rmem_alloc + * Return: sk_rmem_alloc */ static inline int sk_rmem_alloc_get(const struct sock *sk) { @@ -2039,7 +2064,7 @@ static inline int sk_rmem_alloc_get(const struct sock *sk) * sk_has_allocations - check if allocations are outstanding * @sk: socket * - * Returns true if socket has write or read allocations + * Return: true if socket has write or read allocations */ static inline bool sk_has_allocations(const struct sock *sk) { @@ -2050,7 +2075,7 @@ static inline bool sk_has_allocations(const struct sock *sk) * skwq_has_sleeper - check if there are any waiting processes * @wq: struct socket_wq * - * Returns true if socket_wq has waiting processes + * Return: true if socket_wq has waiting processes * * The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory * barrier call. They were added due to the race found within the tcp code. @@ -2238,6 +2263,9 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest * inside other socket operations and end up recursing into sk_page_frag() * while it's already in use. + * + * Return: a per task page_frag if context allows that, + * otherwise a per socket one. */ static inline struct page_frag *sk_page_frag(struct sock *sk) { @@ -2432,6 +2460,7 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) &skb_shinfo(skb)->tskey); } +DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from @@ -2440,7 +2469,6 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) * This routine must be called with interrupts disabled or with the socket * locked so that the sk_buff queue operation is ok. */ -DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); -- cgit v1.2.3 From d2f273f0a9205257b91af1d3d461ee29688c2f24 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 15 Feb 2020 15:34:07 -0800 Subject: skbuff.h: fix all kernel-doc warnings Fix all kernel-doc warnings in . Fixes these warnings: ../include/linux/skbuff.h:890: warning: Function parameter or member 'list' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'dev_scratch' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'ip_defrag_offset' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'skb_mstamp_ns' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member '__cloned_offset' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'head_frag' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member '__pkt_type_offset' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'encapsulation' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'encap_hdr_csum' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'csum_valid' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member '__pkt_vlan_present_offset' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'vlan_present' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'csum_complete_sw' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'csum_level' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'inner_protocol_type' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'remcsum_offload' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'sender_cpu' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'reserved_tailroom' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'inner_ipproto' not described in 'sk_buff' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/skbuff.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ca8806b69388..5b50278c4bc8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -611,9 +611,15 @@ typedef unsigned char *sk_buff_data_t; * @next: Next buffer in list * @prev: Previous buffer in list * @tstamp: Time we arrived/left + * @skb_mstamp_ns: (aka @tstamp) earliest departure time; start point + * for retransmit timer * @rbnode: RB tree node, alternative to next/prev for netem/tcp + * @list: queue head * @sk: Socket we are owned by + * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in + * fragmentation management * @dev: Device we arrived on/are leaving by + * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here * @_skb_refdst: destination entry (with norefcount bit) * @sp: the security path, used for xfrm @@ -632,6 +638,9 @@ typedef unsigned char *sk_buff_data_t; * @pkt_type: Packet class * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs + * @inner_protocol_type: whether the inner protocol is + * ENCAP_TYPE_ETHER or ENCAP_TYPE_IPPROTO + * @remcsum_offload: remote checksum offload is enabled * @offload_fwd_mark: Packet was L2-forwarded in hardware * @offload_l3_fwd_mark: Packet was L3-forwarded in hardware * @tc_skip_classify: do not classify packet. set by IFB device @@ -650,6 +659,8 @@ typedef unsigned char *sk_buff_data_t; * @tc_index: Traffic control index * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices + * @head_frag: skb was allocated from page fragments, + * not allocated by kmalloc() or vmalloc(). * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves * @active_extensions: active extensions (skb_ext_id types) * @ndisc_nodetype: router type (from link layer) @@ -660,15 +671,28 @@ typedef unsigned char *sk_buff_data_t; * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS + * @encapsulation: indicates the inner headers in the skbuff are valid + * @encap_hdr_csum: software checksum is needed + * @csum_valid: checksum is already valid * @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL + * @csum_complete_sw: checksum was completed by software + * @csum_level: indicates the number of consecutive checksums found in + * the packet minus one that have been verified as + * CHECKSUM_UNNECESSARY (max 3) * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @napi_id: id of the NAPI struct this skb came from + * @sender_cpu: (aka @napi_id) source CPU in XPS * @secmark: security marking * @mark: Generic packet mark + * @reserved_tailroom: (aka @mark) number of bytes of free space available + * at the tail of an sk_buff + * @vlan_present: VLAN tag is present * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information * @inner_protocol: Protocol (encapsulation) + * @inner_ipproto: (aka @inner_protocol) stores ipproto when + * skb->inner_protocol_type == ENCAP_TYPE_IPPROTO; * @inner_transport_header: Inner transport layer header (encapsulation) * @inner_network_header: Network layer header (encapsulation) * @inner_mac_header: Link layer header (encapsulation) @@ -750,7 +774,9 @@ struct sk_buff { #endif #define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset) + /* private: */ __u8 __cloned_offset[0]; + /* public: */ __u8 cloned:1, nohdr:1, fclone:2, @@ -775,7 +801,9 @@ struct sk_buff { #endif #define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) + /* private: */ __u8 __pkt_type_offset[0]; + /* public: */ __u8 pkt_type:3; __u8 ignore_df:1; __u8 nf_trace:1; @@ -798,7 +826,9 @@ struct sk_buff { #define PKT_VLAN_PRESENT_BIT 0 #endif #define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, __pkt_vlan_present_offset) + /* private: */ __u8 __pkt_vlan_present_offset[0]; + /* public: */ __u8 vlan_present:1; __u8 csum_complete_sw:1; __u8 csum_level:2; -- cgit v1.2.3 From 6a757c07e51f80ac34325fcd558490d2d1439e1b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 3 Feb 2020 17:37:07 +0100 Subject: netfilter: conntrack: allow insertion of clashing entries This patch further relaxes the need to drop an skb due to a clash with an existing conntrack entry. Current clash resolution handles the case where the clash occurs between two identical entries (distinct nf_conn objects with same tuples), i.e.: Original Reply existing: 10.2.3.4:42 -> 10.8.8.8:53 10.2.3.4:42 <- 10.0.0.6:5353 clashing: 10.2.3.4:42 -> 10.8.8.8:53 10.2.3.4:42 <- 10.0.0.6:5353 ... existing handling will discard the unconfirmed clashing entry and makes skb->_nfct point to the existing one. The skb can then be processed normally just as if the clash would not have existed in the first place. For other clashes, the skb needs to be dropped. This frequently happens with DNS resolvers that send A and AAAA queries back-to-back when NAT rules are present that cause packets to get different DNAT transformations applied, for example: -m statistics --mode random ... -j DNAT --dnat-to 10.0.0.6:5353 -m statistics --mode random ... -j DNAT --dnat-to 10.0.0.7:5353 In this case the A or AAAA query is dropped which incurs a costly delay during name resolution. This patch also allows this collision type: Original Reply existing: 10.2.3.4:42 -> 10.8.8.8:53 10.2.3.4:42 <- 10.0.0.6:5353 clashing: 10.2.3.4:42 -> 10.8.8.8:53 10.2.3.4:42 <- 10.0.0.7:5353 In this case, clash is in original direction -- the reply direction is still unique. The change makes it so that when the 2nd colliding packet is received, the clashing conntrack is tagged with new IPS_NAT_CLASH_BIT, gets a fixed 1 second timeout and is inserted in the reply direction only. The entry is hidden from 'conntrack -L', it will time out quickly and it can be early dropped because it will never progress to the ASSURED state. To avoid special-casing the delete code path to special case the ORIGINAL hlist_nulls node, a new helper, "hlist_nulls_add_fake", is added so hlist_nulls_del() will work. Example: CPU A: CPU B: 1. 10.2.3.4:42 -> 10.8.8.8:53 (A) 2. 10.2.3.4:42 -> 10.8.8.8:53 (AAAA) 3. Apply DNAT, reply changed to 10.0.0.6 4. 10.2.3.4:42 -> 10.8.8.8:53 (AAAA) 5. Apply DNAT, reply changed to 10.0.0.7 6. confirm/commit to conntrack table, no collisions 7. commit clashing entry Reply comes in: 10.2.3.4:42 <- 10.0.0.6:5353 (A) -> Finds a conntrack, DNAT is reversed & packet forwarded to 10.2.3.4:42 10.2.3.4:42 <- 10.0.0.7:5353 (AAAA) -> Finds a conntrack, DNAT is reversed & packet forwarded to 10.2.3.4:42 The conntrack entry is deleted from table, as it has the NAT_CLASH bit set. In case of a retransmit from ORIGINAL dir, all further packets will get the DNAT transformation to 10.0.0.6. I tried to come up with other solutions but they all have worse problems. Alternatives considered were: 1. Confirm ct entries at allocation time, not in postrouting. a. will cause uneccesarry work when the skb that creates the conntrack is dropped by ruleset. b. in case nat is applied, ct entry would need to be moved in the table, which requires another spinlock pair to be taken. c. breaks the 'unconfirmed entry is private to cpu' assumption: we would need to guard all nfct->ext allocation requests with ct->lock spinlock. 2. Make the unconfirmed list a hash table instead of a pcpu list. Shares drawback c) of the first alternative. 3. Document this is expected and force users to rearrange their ruleset (e.g. by using "-m cluster" instead of "-m statistics"). nft has the 'jhash' expression which can be used instead of 'numgen'. Major drawback: doesn't fix what I consider a bug, not very realistic and I believe its reasonable to have the existing rulesets to 'just work'. 4. Document this is expected and force users to steer problematic packets to the same CPU -- this would serialize the "allocate new conntrack entry/nat table evaluation/perform nat/confirm entry", so no race can occur. Similar drawback to 3. Another advantage of this patch compared to 1) and 2) is that there are no changes to the hot path; things are handled in the udp tracker and the clash resolution path. Cc: rcu@vger.kernel.org Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Jozsef Kadlecsik Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/rculist_nulls.h | 7 +++++++ include/uapi/linux/netfilter/nf_conntrack_common.h | 12 +++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index e5b752027a03..9670b54b484a 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -145,6 +145,13 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, } } +/* after that hlist_nulls_del will work */ +static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n) +{ + n->pprev = &n->next; + n->next = (struct hlist_nulls_node *)NULLS_MARKER(NULL); +} + /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 336014bf8868..b6f0bb1dc799 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -97,6 +97,15 @@ enum ip_conntrack_status { IPS_UNTRACKED_BIT = 12, IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), +#ifdef __KERNEL__ + /* Re-purposed for in-kernel use: + * Tags a conntrack entry that clashed with an existing entry + * on insert. + */ + IPS_NAT_CLASH_BIT = IPS_UNTRACKED_BIT, + IPS_NAT_CLASH = IPS_UNTRACKED, +#endif + /* Conntrack got a helper explicitly attached via CT target. */ IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), @@ -110,7 +119,8 @@ enum ip_conntrack_status { */ IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | - IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD), + IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_UNTRACKED | + IPS_OFFLOAD), __IPS_MAX_BIT = 15, }; -- cgit v1.2.3 From d970a325561da5e611596cbb06475db3755ce823 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 13 Feb 2020 18:22:55 +0100 Subject: KVM: x86: fix missing prototypes Reported with "make W=1" due to -Wmissing-prototypes. Reported-by: Qian Cai Reviewed-by: Miaohe Lin Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e89eb67356cb..7944ad6ac10b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -889,6 +889,8 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); +int kvm_arch_post_init_vm(struct kvm *kvm); +void kvm_arch_pre_destroy_vm(struct kvm *kvm); #ifndef __KVM_HAVE_ARCH_VM_ALLOC /* -- cgit v1.2.3 From 8a9093c79863b58cc2f9874d7ae788f0d622a596 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 17 Feb 2020 15:38:09 -0500 Subject: net: sched: correct flower port blocking tc flower rules that are based on src or dst port blocking are sometimes ineffective due to uninitialized stack data. __skb_flow_dissect() extracts ports from the skb for tc flower to match against. However, the port dissection is not done when when the FLOW_DIS_IS_FRAGMENT bit is set in key_control->flags. All callers of __skb_flow_dissect(), zero-out the key_control field except for fl_classify() as used by the flower classifier. Thus, the FLOW_DIS_IS_FRAGMENT may be set on entry to __skb_flow_dissect(), since key_control is allocated on the stack and may not be initialized. Since key_basic and key_control are present for all flow keys, let's make sure they are initialized. Fixes: 62230715fd24 ("flow_dissector: do not dissect l4 ports for fragments") Co-developed-by: Eric Dumazet Signed-off-by: Eric Dumazet Acked-by: Cong Wang Signed-off-by: Jason Baron Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index e9391e877f9a..628383915827 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -5,6 +5,7 @@ #include #include #include +#include #include struct sk_buff; @@ -348,4 +349,12 @@ struct bpf_flow_dissector { void *data_end; }; +static inline void +flow_dissector_init_keys(struct flow_dissector_key_control *key_control, + struct flow_dissector_key_basic *key_basic) +{ + memset(key_control, 0, sizeof(*key_control)); + memset(key_basic, 0, sizeof(*key_basic)); +} + #endif -- cgit v1.2.3 From f25975f42f2f8f2a01303054d6a70c7ceb1fcf54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 18 Feb 2020 14:03:34 +0100 Subject: bpf, uapi: Remove text about bpf_redirect_map() giving higher performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The performance of bpf_redirect() is now roughly the same as that of bpf_redirect_map(). However, David Ahern pointed out that the header file has not been updated to reflect this, and still says that a significant performance increase is possible when using bpf_redirect_map(). Remove this text from the bpf_redirect_map() description, and reword the description in bpf_redirect() slightly. Also fix the 'Return' section of the bpf_redirect_map() documentation. Fixes: 1d233886dd90 ("xdp: Use bulking for non-map XDP_REDIRECT and consolidate code paths") Reported-by: David Ahern Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200218130334.29889-1-toke@redhat.com --- include/uapi/linux/bpf.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f1d74a2bd234..22f235260a3a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1045,9 +1045,9 @@ union bpf_attr { * supports redirection to the egress interface, and accepts no * flag at all. * - * The same effect can be attained with the more generic - * **bpf_redirect_map**\ (), which requires specific maps to be - * used but offers better performance. + * The same effect can also be attained with the more generic + * **bpf_redirect_map**\ (), which uses a BPF map to store the + * redirect target instead of providing it directly to the helper. * Return * For XDP, the helper returns **XDP_REDIRECT** on success or * **XDP_ABORTED** on error. For other program types, the values @@ -1611,13 +1611,11 @@ union bpf_attr { * the caller. Any higher bits in the *flags* argument must be * unset. * - * When used to redirect packets to net devices, this helper - * provides a high performance increase over **bpf_redirect**\ (). - * This is due to various implementation details of the underlying - * mechanisms, one of which is the fact that **bpf_redirect_map**\ - * () tries to send packet as a "bulk" to the device. + * See also bpf_redirect(), which only supports redirecting to an + * ifindex, but doesn't require a map to do so. * Return - * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. + * **XDP_REDIRECT** on success, or the value of the two lower bits + * of the **flags* argument on error. * * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description -- cgit v1.2.3 From 13a7e459a41a56d788ab33d825c6205379bbb711 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Tue, 14 Jan 2020 09:27:27 +0200 Subject: net/mlx5: DR, Handle reformat capability over sw-steering tables On flow table creation, send the relevant flags according to what the FW currently supports. When FW doesn't support reformat option over SW-steering managed table, the driver shouldn't pass this. Fixes: 988fd6b32d07 ("net/mlx5: DR, Pass table flags at creation to lower layer") Signed-off-by: Erez Shitrit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ff8c9d527bb4..bfdf41537cf1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -688,7 +688,10 @@ struct mlx5_ifc_flow_table_nic_cap_bits { u8 nic_rx_multi_path_tirs[0x1]; u8 nic_rx_multi_path_tirs_fts[0x1]; u8 allow_sniffer_and_nic_rx_shared_tir[0x1]; - u8 reserved_at_3[0x1d]; + u8 reserved_at_3[0x4]; + u8 sw_owner_reformat_supported[0x1]; + u8 reserved_at_8[0x18]; + u8 encap_general_header[0x1]; u8 reserved_at_21[0xa]; u8 log_max_packet_reformat_context[0x5]; -- cgit v1.2.3 From 85c46b78da58398be1c5166f55063c0512decd39 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Feb 2020 21:18:42 +0900 Subject: bootconfig: Add bootconfig magic word for indicating bootconfig explicitly Add bootconfig magic word to the end of bootconfig on initrd image for indicating explicitly the bootconfig is there. Also tools/bootconfig treats wrong size or wrong checksum or parse error as an error, because if there is a bootconfig magic word, there must be a bootconfig. The bootconfig magic word is "#BOOTCONFIG\n", 12 bytes word. Thus the block image of the initrd file with bootconfig is as follows. [Initrd][bootconfig][size][csum][#BOOTCONFIG\n] Link: http://lkml.kernel.org/r/158220112263.26565.3944814205960612841.stgit@devnote2 Suggested-by: Steven Rostedt Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/bootconfig.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 7e18c939663e..d11e183fcb54 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -10,6 +10,9 @@ #include #include +#define BOOTCONFIG_MAGIC "#BOOTCONFIG\n" +#define BOOTCONFIG_MAGIC_LEN 12 + /* XBC tree node */ struct xbc_node { u16 next; -- cgit v1.2.3 From 2546287c5fb363a0165933ae2181c92f03e701d0 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Fri, 21 Feb 2020 10:07:25 +0800 Subject: genirq/irqdomain: Make sure all irq domain flags are distinct This was noticed when printing debugfs for MSIs on my ARM64 server. The new dstate IRQD_MSI_NOMASK_QUIRK came out surprisingly while it should only be the x86 stuff for the time being... The new MSI quirk flag uses the same bit as IRQ_DOMAIN_NAME_ALLOCATED which is oddly defined as bit 6 for no good reason. Switch it to the non used bit 1. Fixes: 6f1a4891a592 ("x86/apic/msi: Plug non-maskable MSI affinity race") Signed-off-by: Zenghui Yu Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200221020725.2038-1-yuzenghui@huawei.com --- include/linux/irqdomain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index b2d47571ab67..8d062e86d954 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -192,7 +192,7 @@ enum { IRQ_DOMAIN_FLAG_HIERARCHY = (1 << 0), /* Irq domain name was allocated in __irq_domain_add() */ - IRQ_DOMAIN_NAME_ALLOCATED = (1 << 6), + IRQ_DOMAIN_NAME_ALLOCATED = (1 << 1), /* Irq domain is an IPI domain with virq per cpu */ IRQ_DOMAIN_FLAG_IPI_PER_CPU = (1 << 2), -- cgit v1.2.3 From 63fb9623427fbb44e3782233b6e4714057b76ff2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 21 Feb 2020 01:46:18 +0100 Subject: ACPI: PM: s2idle: Check fixed wakeup events in acpi_s2idle_wake() Commit fdde0ff8590b ("ACPI: PM: s2idle: Prevent spurious SCIs from waking up the system") overlooked the fact that fixed events can wake up the system too and broke RTC wakeup from suspend-to-idle as a result. Fix this issue by checking the fixed events in acpi_s2idle_wake() in addition to checking wakeup GPEs and break out of the suspend-to-idle loop if the status bits of any enabled fixed events are set then. Fixes: fdde0ff8590b ("ACPI: PM: s2idle: Prevent spurious SCIs from waking up the system") Reported-and-tested-by: Chris Wilson Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- include/acpi/acpixf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 5867777bb7d0..8e8be989c2a6 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -753,6 +753,7 @@ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void)) ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_any_gpe_status_set(void)) +ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_any_fixed_event_status_set(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_get_gpe_device(u32 gpe_index, -- cgit v1.2.3 From 595abbaff5db121428247a2e6ab368734472e101 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 20 Feb 2020 20:03:50 -0800 Subject: y2038: remove ktime to/from timespec/timeval conversion A couple of helpers are now obsolete and can be removed, so drivers can no longer start using them and instead use y2038-safe interfaces. Link: http://lkml.kernel.org/r/20200110154232.4104492-2-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Thomas Gleixner Cc: Deepa Dinamani Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ktime.h | 37 ------------------------------------- 1 file changed, 37 deletions(-) (limited to 'include') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index b2bb44f87f5a..d1fb05135665 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -66,33 +66,15 @@ static inline ktime_t ktime_set(const s64 secs, const unsigned long nsecs) */ #define ktime_sub_ns(kt, nsval) ((kt) - (nsval)) -/* convert a timespec to ktime_t format: */ -static inline ktime_t timespec_to_ktime(struct timespec ts) -{ - return ktime_set(ts.tv_sec, ts.tv_nsec); -} - /* convert a timespec64 to ktime_t format: */ static inline ktime_t timespec64_to_ktime(struct timespec64 ts) { return ktime_set(ts.tv_sec, ts.tv_nsec); } -/* convert a timeval to ktime_t format: */ -static inline ktime_t timeval_to_ktime(struct timeval tv) -{ - return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC); -} - -/* Map the ktime_t to timespec conversion to ns_to_timespec function */ -#define ktime_to_timespec(kt) ns_to_timespec((kt)) - /* Map the ktime_t to timespec conversion to ns_to_timespec function */ #define ktime_to_timespec64(kt) ns_to_timespec64((kt)) -/* Map the ktime_t to timeval conversion to ns_to_timeval function */ -#define ktime_to_timeval(kt) ns_to_timeval((kt)) - /* Convert ktime_t to nanoseconds */ static inline s64 ktime_to_ns(const ktime_t kt) { @@ -215,25 +197,6 @@ static inline ktime_t ktime_sub_ms(const ktime_t kt, const u64 msec) extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); -/** - * ktime_to_timespec_cond - convert a ktime_t variable to timespec - * format only if the variable contains data - * @kt: the ktime_t variable to convert - * @ts: the timespec variable to store the result in - * - * Return: %true if there was a successful conversion, %false if kt was 0. - */ -static inline __must_check bool ktime_to_timespec_cond(const ktime_t kt, - struct timespec *ts) -{ - if (kt) { - *ts = ktime_to_timespec(kt); - return true; - } else { - return false; - } -} - /** * ktime_to_timespec64_cond - convert a ktime_t variable to timespec64 * format only if the variable contains data -- cgit v1.2.3 From 412c53a680a97cb1ae2c0ab60230e193bee86387 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 20 Feb 2020 20:03:54 -0800 Subject: y2038: remove unused time32 interfaces No users remain, so kill these off before we grow new ones. Link: http://lkml.kernel.org/r/20200110154232.4104492-3-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Thomas Gleixner Cc: Deepa Dinamani Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 29 -------- include/linux/time32.h | 154 +----------------------------------------- include/linux/timekeeping32.h | 32 --------- include/linux/types.h | 5 -- 4 files changed, 1 insertion(+), 219 deletions(-) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index 11083d84eb23..df2475be134a 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -248,15 +248,6 @@ typedef struct compat_siginfo { } _sifields; } compat_siginfo_t; -/* - * These functions operate on 32- or 64-bit specs depending on - * COMPAT_USE_64BIT_TIME, hence the void user pointer arguments. - */ -extern int compat_get_timespec(struct timespec *, const void __user *); -extern int compat_put_timespec(const struct timespec *, void __user *); -extern int compat_get_timeval(struct timeval *, const void __user *); -extern int compat_put_timeval(const struct timeval *, void __user *); - struct compat_iovec { compat_uptr_t iov_base; compat_size_t iov_len; @@ -416,26 +407,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginf int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); -static inline int old_timeval32_compare(struct old_timeval32 *lhs, - struct old_timeval32 *rhs) -{ - if (lhs->tv_sec < rhs->tv_sec) - return -1; - if (lhs->tv_sec > rhs->tv_sec) - return 1; - return lhs->tv_usec - rhs->tv_usec; -} - -static inline int old_timespec32_compare(struct old_timespec32 *lhs, - struct old_timespec32 *rhs) -{ - if (lhs->tv_sec < rhs->tv_sec) - return -1; - if (lhs->tv_sec > rhs->tv_sec) - return 1; - return lhs->tv_nsec - rhs->tv_nsec; -} - extern int get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat); /* diff --git a/include/linux/time32.h b/include/linux/time32.h index cad4c3186002..cf9320cd2d0b 100644 --- a/include/linux/time32.h +++ b/include/linux/time32.h @@ -12,8 +12,6 @@ #include #include -#define TIME_T_MAX (__kernel_old_time_t)((1UL << ((sizeof(__kernel_old_time_t) << 3) - 1)) - 1) - typedef s32 old_time32_t; struct old_timespec32 { @@ -73,162 +71,12 @@ struct __kernel_timex; int get_old_timex32(struct __kernel_timex *, const struct old_timex32 __user *); int put_old_timex32(struct old_timex32 __user *, const struct __kernel_timex *); -#if __BITS_PER_LONG == 64 - -/* timespec64 is defined as timespec here */ -static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64) -{ - return *(const struct timespec *)&ts64; -} - -static inline struct timespec64 timespec_to_timespec64(const struct timespec ts) -{ - return *(const struct timespec64 *)&ts; -} - -#else -static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64) -{ - struct timespec ret; - - ret.tv_sec = (time_t)ts64.tv_sec; - ret.tv_nsec = ts64.tv_nsec; - return ret; -} - -static inline struct timespec64 timespec_to_timespec64(const struct timespec ts) -{ - struct timespec64 ret; - - ret.tv_sec = ts.tv_sec; - ret.tv_nsec = ts.tv_nsec; - return ret; -} -#endif - -static inline int timespec_equal(const struct timespec *a, - const struct timespec *b) -{ - return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); -} - -/* - * lhs < rhs: return <0 - * lhs == rhs: return 0 - * lhs > rhs: return >0 - */ -static inline int timespec_compare(const struct timespec *lhs, const struct timespec *rhs) -{ - if (lhs->tv_sec < rhs->tv_sec) - return -1; - if (lhs->tv_sec > rhs->tv_sec) - return 1; - return lhs->tv_nsec - rhs->tv_nsec; -} - -/* - * Returns true if the timespec is norm, false if denorm: - */ -static inline bool timespec_valid(const struct timespec *ts) -{ - /* Dates before 1970 are bogus */ - if (ts->tv_sec < 0) - return false; - /* Can't have more nanoseconds then a second */ - if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) - return false; - return true; -} - -/** - * timespec_to_ns - Convert timespec to nanoseconds - * @ts: pointer to the timespec variable to be converted - * - * Returns the scalar nanosecond representation of the timespec - * parameter. - */ -static inline s64 timespec_to_ns(const struct timespec *ts) -{ - return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; -} - /** - * ns_to_timespec - Convert nanoseconds to timespec - * @nsec: the nanoseconds value to be converted - * - * Returns the timespec representation of the nsec parameter. - */ -extern struct timespec ns_to_timespec(const s64 nsec); - -/** - * timespec_add_ns - Adds nanoseconds to a timespec - * @a: pointer to timespec to be incremented - * @ns: unsigned nanoseconds value to be added - * - * This must always be inlined because its used from the x86-64 vdso, - * which cannot call other kernel functions. - */ -static __always_inline void timespec_add_ns(struct timespec *a, u64 ns) -{ - a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns); - a->tv_nsec = ns; -} - -static inline unsigned long mktime(const unsigned int year, - const unsigned int mon, const unsigned int day, - const unsigned int hour, const unsigned int min, - const unsigned int sec) -{ - return mktime64(year, mon, day, hour, min, sec); -} - -static inline bool timeval_valid(const struct timeval *tv) -{ - /* Dates before 1970 are bogus */ - if (tv->tv_sec < 0) - return false; - - /* Can't have more microseconds then a second */ - if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC) - return false; - - return true; -} - -/** - * timeval_to_ns - Convert timeval to nanoseconds - * @ts: pointer to the timeval variable to be converted - * - * Returns the scalar nanosecond representation of the timeval - * parameter. - */ -static inline s64 timeval_to_ns(const struct timeval *tv) -{ - return ((s64) tv->tv_sec * NSEC_PER_SEC) + - tv->tv_usec * NSEC_PER_USEC; -} - -/** - * ns_to_timeval - Convert nanoseconds to timeval + * ns_to_kernel_old_timeval - Convert nanoseconds to timeval * @nsec: the nanoseconds value to be converted * * Returns the timeval representation of the nsec parameter. */ -extern struct timeval ns_to_timeval(const s64 nsec); extern struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec); -/* - * Old names for the 32-bit time_t interfaces, these will be removed - * when everything uses the new names. - */ -#define compat_time_t old_time32_t -#define compat_timeval old_timeval32 -#define compat_timespec old_timespec32 -#define compat_itimerspec old_itimerspec32 -#define ns_to_compat_timeval ns_to_old_timeval32 -#define get_compat_itimerspec64 get_old_itimerspec32 -#define put_compat_itimerspec64 put_old_itimerspec32 -#define compat_get_timespec64 get_old_timespec32 -#define compat_put_timespec64 put_old_timespec32 - #endif diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h index cc59cc9e0e84..266017fc9ee9 100644 --- a/include/linux/timekeeping32.h +++ b/include/linux/timekeeping32.h @@ -11,36 +11,4 @@ static inline unsigned long get_seconds(void) return ktime_get_real_seconds(); } -static inline void getnstimeofday(struct timespec *ts) -{ - struct timespec64 ts64; - - ktime_get_real_ts64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - -static inline void ktime_get_ts(struct timespec *ts) -{ - struct timespec64 ts64; - - ktime_get_ts64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - -static inline void getrawmonotonic(struct timespec *ts) -{ - struct timespec64 ts64; - - ktime_get_raw_ts64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - -static inline void getboottime(struct timespec *ts) -{ - struct timespec64 ts64; - - getboottime64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - #endif diff --git a/include/linux/types.h b/include/linux/types.h index eb870ad42919..d3021c879179 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -65,11 +65,6 @@ typedef __kernel_ssize_t ssize_t; typedef __kernel_ptrdiff_t ptrdiff_t; #endif -#ifndef _TIME_T -#define _TIME_T -typedef __kernel_old_time_t time_t; -#endif - #ifndef _CLOCK_T #define _CLOCK_T typedef __kernel_clock_t clock_t; -- cgit v1.2.3 From c766d1472c70d25ad475cf56042af1652e792b23 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 20 Feb 2020 20:03:57 -0800 Subject: y2038: hide timeval/timespec/itimerval/itimerspec types There are no in-kernel users remaining, but there may still be users that include linux/time.h instead of sys/time.h from user space, so leave the types available to user space while hiding them from kernel space. Only the __kernel_old_* versions of these types remain now. Link: http://lkml.kernel.org/r/20200110154232.4104492-4-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Thomas Gleixner Cc: Deepa Dinamani Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/posix_types.h | 2 ++ include/uapi/linux/time.h | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/uapi/asm-generic/posix_types.h b/include/uapi/asm-generic/posix_types.h index 2f9c80595ba7..b5f7594eee7a 100644 --- a/include/uapi/asm-generic/posix_types.h +++ b/include/uapi/asm-generic/posix_types.h @@ -87,7 +87,9 @@ typedef struct { typedef __kernel_long_t __kernel_off_t; typedef long long __kernel_loff_t; typedef __kernel_long_t __kernel_old_time_t; +#ifndef __KERNEL__ typedef __kernel_long_t __kernel_time_t; +#endif typedef long long __kernel_time64_t; typedef __kernel_long_t __kernel_clock_t; typedef int __kernel_timer_t; diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index a655aa28dc6e..4f4b6e48e01c 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -5,6 +5,7 @@ #include #include +#ifndef __KERNEL__ #ifndef _STRUCT_TIMESPEC #define _STRUCT_TIMESPEC struct timespec { @@ -18,6 +19,17 @@ struct timeval { __kernel_suseconds_t tv_usec; /* microseconds */ }; +struct itimerspec { + struct timespec it_interval;/* timer period */ + struct timespec it_value; /* timer expiration */ +}; + +struct itimerval { + struct timeval it_interval;/* timer interval */ + struct timeval it_value; /* current value */ +}; +#endif + struct timezone { int tz_minuteswest; /* minutes west of Greenwich */ int tz_dsttime; /* type of dst correction */ @@ -31,16 +43,6 @@ struct timezone { #define ITIMER_VIRTUAL 1 #define ITIMER_PROF 2 -struct itimerspec { - struct timespec it_interval; /* timer period */ - struct timespec it_value; /* timer expiration */ -}; - -struct itimerval { - struct timeval it_interval; /* timer interval */ - struct timeval it_value; /* current value */ -}; - /* * The IDs of the various system clocks (for POSIX.1b interval timers): */ -- cgit v1.2.3 From 467d12f5c7842896d2de3ced74e4147ee29e97c8 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 20 Feb 2020 20:04:03 -0800 Subject: include/uapi/linux/swab.h: fix userspace breakage, use __BITS_PER_LONG for swap QEMU has a funny new build error message when I use the upstream kernel headers: CC block/file-posix.o In file included from /home/cborntra/REPOS/qemu/include/qemu/timer.h:4, from /home/cborntra/REPOS/qemu/include/qemu/timed-average.h:29, from /home/cborntra/REPOS/qemu/include/block/accounting.h:28, from /home/cborntra/REPOS/qemu/include/block/block_int.h:27, from /home/cborntra/REPOS/qemu/block/file-posix.c:30: /usr/include/linux/swab.h: In function `__swab': /home/cborntra/REPOS/qemu/include/qemu/bitops.h:20:34: error: "sizeof" is not defined, evaluates to 0 [-Werror=undef] 20 | #define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE) | ^~~~~~ /home/cborntra/REPOS/qemu/include/qemu/bitops.h:20:41: error: missing binary operator before token "(" 20 | #define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE) | ^ cc1: all warnings being treated as errors make: *** [/home/cborntra/REPOS/qemu/rules.mak:69: block/file-posix.o] Error 1 rm tests/qemu-iotests/socket_scm_helper.o This was triggered by commit d5767057c9a ("uapi: rename ext2_swab() to swab() and share globally in swab.h"). That patch is doing #include but it uses BITS_PER_LONG. The kernel file asm/bitsperlong.h provide only __BITS_PER_LONG. Let us use the __ variant in swap.h Link: http://lkml.kernel.org/r/20200213142147.17604-1-borntraeger@de.ibm.com Fixes: d5767057c9a ("uapi: rename ext2_swab() to swab() and share globally in swab.h") Signed-off-by: Christian Borntraeger Cc: Yury Norov Cc: Allison Randal Cc: Joe Perches Cc: Thomas Gleixner Cc: William Breathitt Gray Cc: Torsten Hilbrich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/swab.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index fa7f97da5b76..7272f85d6d6a 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -135,9 +135,9 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) static __always_inline unsigned long __swab(const unsigned long y) { -#if BITS_PER_LONG == 64 +#if __BITS_PER_LONG == 64 return __swab64(y); -#else /* BITS_PER_LONG == 32 */ +#else /* __BITS_PER_LONG == 32 */ return __swab32(y); #endif } -- cgit v1.2.3 From f66ee0410b1c3481ee75e5db9b34547b4d582465 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 11 Feb 2020 23:20:43 +0100 Subject: netfilter: ipset: Fix "INFO: rcu detected stall in hash_xxx" reports In the case of huge hash:* types of sets, due to the single spinlock of a set the processing of the whole set under spinlock protection could take too long. There were four places where the whole hash table of the set was processed from bucket to bucket under holding the spinlock: - During resizing a set, the original set was locked to exclude kernel side add/del element operations (userspace add/del is excluded by the nfnetlink mutex). The original set is actually just read during the resize, so the spinlocking is replaced with rcu locking of regions. However, thus there can be parallel kernel side add/del of entries. In order not to loose those operations a backlog is added and replayed after the successful resize. - Garbage collection of timed out entries was also protected by the spinlock. In order not to lock too long, region locking is introduced and a single region is processed in one gc go. Also, the simple timer based gc running is replaced with a workqueue based solution. The internal book-keeping (number of elements, size of extensions) is moved to region level due to the region locking. - Adding elements: when the max number of the elements is reached, the gc was called to evict the timed out entries. The new approach is that the gc is called just for the matching region, assuming that if the region (proportionally) seems to be full, then the whole set does. We could scan the other regions to check every entry under rcu locking, but for huge sets it'd mean a slowdown at adding elements. - Listing the set header data: when the set was defined with timeout support, the garbage collector was called to clean up timed out entries to get the correct element numbers and set size values. Now the set is scanned to check non-timed out entries, without actually calling the gc for the whole set. Thanks to Florian Westphal for helping me to solve the SOFTIRQ-safe -> SOFTIRQ-unsafe lock order issues during working on the patch. Reported-by: syzbot+4b0e9d4ff3cf117837e5@syzkaller.appspotmail.com Reported-by: syzbot+c27b8d5010f45c666ed1@syzkaller.appspotmail.com Reported-by: syzbot+68a806795ac89df3aa1c@syzkaller.appspotmail.com Fixes: 23c42a403a9c ("netfilter: ipset: Introduction of new commands and protocol version 7") Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 908d38dbcb91..5448c8b443db 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -121,6 +121,7 @@ struct ip_set_ext { u32 timeout; u8 packets_op; u8 bytes_op; + bool target; }; struct ip_set; @@ -187,6 +188,14 @@ struct ip_set_type_variant { /* Return true if "b" set is the same as "a" * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); + /* Region-locking is used */ + bool region_lock; +}; + +struct ip_set_region { + spinlock_t lock; /* Region lock */ + size_t ext_size; /* Size of the dynamic extensions */ + u32 elements; /* Number of elements vs timeout */ }; /* The core set type structure */ @@ -501,7 +510,7 @@ ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, } #define IP_SET_INIT_KEXT(skb, opt, set) \ - { .bytes = (skb)->len, .packets = 1, \ + { .bytes = (skb)->len, .packets = 1, .target = true,\ .timeout = ip_set_adt_opt_timeout(opt, set) } #define IP_SET_INIT_UEXT(set) \ -- cgit v1.2.3 From c780e86dd48ef6467a1146cf7d0fe1e05a635039 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 6 Feb 2020 15:28:12 +0100 Subject: blktrace: Protect q->blk_trace with RCU KASAN is reporting that __blk_add_trace() has a use-after-free issue when accessing q->blk_trace. Indeed the switching of block tracing (and thus eventual freeing of q->blk_trace) is completely unsynchronized with the currently running tracing and thus it can happen that the blk_trace structure is being freed just while __blk_add_trace() works on it. Protect accesses to q->blk_trace by RCU during tracing and make sure we wait for the end of RCU grace period when shutting down tracing. Luckily that is rare enough event that we can afford that. Note that postponing the freeing of blk_trace to an RCU callback should better be avoided as it could have unexpected user visible side-effects as debugfs files would be still existing for a short while block tracing has been shut down. Link: https://bugzilla.kernel.org/show_bug.cgi?id=205711 CC: stable@vger.kernel.org Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Tested-by: Ming Lei Reviewed-by: Bart Van Assche Reported-by: Tristan Madani Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- include/linux/blktrace_api.h | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 053ea4b51988..10455b2bbbb4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -524,7 +524,7 @@ struct request_queue { unsigned int sg_reserved_size; int node; #ifdef CONFIG_BLK_DEV_IO_TRACE - struct blk_trace *blk_trace; + struct blk_trace __rcu *blk_trace; struct mutex blk_trace_mutex; #endif /* diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7bb2d8de9f30..3b6ff5902edc 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -51,9 +51,13 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f **/ #define blk_add_cgroup_trace_msg(q, cg, fmt, ...) \ do { \ - struct blk_trace *bt = (q)->blk_trace; \ + struct blk_trace *bt; \ + \ + rcu_read_lock(); \ + bt = rcu_dereference((q)->blk_trace); \ if (unlikely(bt)) \ __trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\ + rcu_read_unlock(); \ } while (0) #define blk_add_trace_msg(q, fmt, ...) \ blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__) @@ -61,10 +65,14 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f static inline bool blk_trace_note_message_enabled(struct request_queue *q) { - struct blk_trace *bt = q->blk_trace; - if (likely(!bt)) - return false; - return bt->act_mask & BLK_TC_NOTIFY; + struct blk_trace *bt; + bool ret; + + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + ret = bt && (bt->act_mask & BLK_TC_NOTIFY); + rcu_read_unlock(); + return ret; } extern void blk_add_driver_data(struct request_queue *q, struct request *rq, -- cgit v1.2.3 From a8e41f6033a0c5633d55d6e35993c9e2005d872f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Feb 2020 18:05:35 +0800 Subject: icmp: allow icmpv6_ndo_send to work with CONFIG_IPV6=n The icmpv6_send function has long had a static inline implementation with an empty body for CONFIG_IPV6=n, so that code calling it doesn't need to be ifdef'd. The new icmpv6_ndo_send function, which is intended for drivers as a drop-in replacement with an identical function signature, should follow the same pattern. Without this patch, drivers that used to work with CONFIG_IPV6=n now result in a linker error. Cc: Chen Zhou Reported-by: Hulk Robot Fixes: 0b41713b6066 ("icmp: introduce helper for nat'd source address in network device context") Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 93338fd54af8..33d379602314 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -22,19 +22,23 @@ extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, unsigned int data_len); +#if IS_ENABLED(CONFIG_NF_NAT) +void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info); +#else +#define icmpv6_ndo_send icmpv6_send +#endif + #else static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) { - } -#endif -#if IS_ENABLED(CONFIG_NF_NAT) -void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info); -#else -#define icmpv6_ndo_send icmpv6_send +static inline void icmpv6_ndo_send(struct sk_buff *skb, + u8 type, u8 code, __u32 info) +{ +} #endif extern int icmpv6_init(void); -- cgit v1.2.3 From fcd07f9adc7dacc2532695cf9dd2284d49e716ff Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 28 Feb 2020 09:49:41 +0100 Subject: KVM: let declaration of kvm_get_running_vcpus match implementation Sparse notices that declaration and implementation do not match: arch/s390/kvm/../../../virt/kvm/kvm_main.c:4435:17: warning: incorrect type in return expression (different address spaces) arch/s390/kvm/../../../virt/kvm/kvm_main.c:4435:17: expected struct kvm_vcpu [noderef] ** arch/s390/kvm/../../../virt/kvm/kvm_main.c:4435:17: got struct kvm_vcpu *[noderef] * Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7944ad6ac10b..bcb9b2ac0791 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1344,7 +1344,7 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ struct kvm_vcpu *kvm_get_running_vcpu(void); -struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); +struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS bool kvm_arch_has_irq_bypass(void); -- cgit v1.2.3