From 1680ac7a5ad578d7acf819912557ceea4b4a5a88 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 7 Jul 2023 16:18:29 -0700 Subject: Input: gameport - use IS_REACHABLE() instead of open-coding it Replace an open-coded preprocessor conditional with an equivalent helper. Reviewed-by: Randy Dunlap Link: https://lore.kernel.org/r/ZKYLLmsdCH0Gp7TO@google.com Signed-off-by: Dmitry Torokhov --- include/linux/gameport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 0a221e768ea4..07e370113b2b 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -63,7 +63,7 @@ struct gameport_driver { int gameport_open(struct gameport *gameport, struct gameport_driver *drv, int mode); void gameport_close(struct gameport *gameport); -#if defined(CONFIG_GAMEPORT) || (defined(MODULE) && defined(CONFIG_GAMEPORT_MODULE)) +#if IS_REACHABLE(CONFIG_GAMEPORT) void __gameport_register_port(struct gameport *gameport, struct module *owner); /* use a define to avoid include chaining to get THIS_MODULE */ -- cgit v1.2.3 From 8ce49c2a2aa53afde9a20a8ce02b069d3b262af0 Mon Sep 17 00:00:00 2001 From: Deepak Kumar Singh Date: Fri, 7 Jul 2023 03:11:36 +0530 Subject: rpmsg: core: Add signal API support Some transports like Glink support the state notifications between clients using flow control signals similar to serial protocol signals. Local glink client drivers can send and receive flow control status to glink clients running on remote processors. Add APIs to support sending and receiving of flow control status by rpmsg clients. Signed-off-by: Deepak Kumar Singh Signed-off-by: Sarannya S Acked-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/1688679698-31274-2-git-send-email-quic_sarannya@quicinc.com Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 523c98b96cb4..90d8e4475f80 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -64,12 +64,14 @@ struct rpmsg_device { }; typedef int (*rpmsg_rx_cb_t)(struct rpmsg_device *, void *, int, void *, u32); +typedef int (*rpmsg_flowcontrol_cb_t)(struct rpmsg_device *, void *, bool); /** * struct rpmsg_endpoint - binds a local rpmsg address to its user * @rpdev: rpmsg channel device * @refcount: when this drops to zero, the ept is deallocated * @cb: rx callback handler + * @flow_cb: remote flow control callback handler * @cb_lock: must be taken before accessing/changing @cb * @addr: local rpmsg address * @priv: private data for the driver's use @@ -92,6 +94,7 @@ struct rpmsg_endpoint { struct rpmsg_device *rpdev; struct kref refcount; rpmsg_rx_cb_t cb; + rpmsg_flowcontrol_cb_t flow_cb; struct mutex cb_lock; u32 addr; void *priv; @@ -106,6 +109,7 @@ struct rpmsg_endpoint { * @probe: invoked when a matching rpmsg channel (i.e. device) is found * @remove: invoked when the rpmsg channel is removed * @callback: invoked when an inbound message is received on the channel + * @flowcontrol: invoked when remote side flow control request is received */ struct rpmsg_driver { struct device_driver drv; @@ -113,6 +117,7 @@ struct rpmsg_driver { int (*probe)(struct rpmsg_device *dev); void (*remove)(struct rpmsg_device *dev); int (*callback)(struct rpmsg_device *, void *, int, void *, u32); + int (*flowcontrol)(struct rpmsg_device *, void *, bool); }; static inline u16 rpmsg16_to_cpu(struct rpmsg_device *rpdev, __rpmsg16 val) @@ -192,6 +197,8 @@ __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp, ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept); +int rpmsg_set_flow_control(struct rpmsg_endpoint *ept, bool pause, u32 dst); + #else static inline int rpmsg_register_device_override(struct rpmsg_device *rpdev, @@ -316,6 +323,14 @@ static inline ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept) return -ENXIO; } +static inline int rpmsg_set_flow_control(struct rpmsg_endpoint *ept, bool pause, u32 dst) +{ + /* This shouldn't be possible */ + WARN_ON(1); + + return -ENXIO; +} + #endif /* IS_ENABLED(CONFIG_RPMSG) */ /* use a macro to avoid include chaining to get THIS_MODULE */ -- cgit v1.2.3 From f247f08da0ce822de0d6b2feec811dd6d4d599ce Mon Sep 17 00:00:00 2001 From: Siddharth Gupta Date: Fri, 24 Feb 2023 13:17:06 -0800 Subject: remoteproc: core: Export the rproc coredump APIs The remoteproc coredump APIs are currently only part of the internal remoteproc header. This prevents the remoteproc platform drivers from using these APIs when needed. This change moves the rproc_coredump() and rproc_coredump_cleanup() APIs to the linux header and marks them as exported symbols. Signed-off-by: Siddharth Gupta Signed-off-by: Gokul krishna Krishnakumar Link: https://lore.kernel.org/r/20230224211707.30916-2-quic_gokukris@quicinc.com Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index fe8978eb69f1..b4795698d8c2 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -690,6 +690,10 @@ int rproc_detach(struct rproc *rproc); int rproc_set_firmware(struct rproc *rproc, const char *fw_name); void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type); void *rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem); + +/* from remoteproc_coredump.c */ +void rproc_coredump_cleanup(struct rproc *rproc); +void rproc_coredump(struct rproc *rproc); void rproc_coredump_using_sections(struct rproc *rproc); int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size); int rproc_coredump_add_custom_segment(struct rproc *rproc, -- cgit v1.2.3 From b4f78ff746ec5274fffa92fa2a4dc531360b5016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 14 Jul 2023 22:56:14 +0200 Subject: pwm: Use a consistent name for pwm_chip pointers in the core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most variables of type struct pwm_chip * are named "chip", there are only three outliers called "pc". Change these three to "chip", too, for consistency. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/pwm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 04ae1d9073a7..d2f9f690a9c1 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -298,7 +298,7 @@ struct pwm_chip { int base; unsigned int npwm; - struct pwm_device * (*of_xlate)(struct pwm_chip *pc, + struct pwm_device * (*of_xlate)(struct pwm_chip *chip, const struct of_phandle_args *args); unsigned int of_pwm_n_cells; @@ -395,9 +395,9 @@ struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, unsigned int index, const char *label); -struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc, +struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *chip, const struct of_phandle_args *args); -struct pwm_device *of_pwm_single_xlate(struct pwm_chip *pc, +struct pwm_device *of_pwm_single_xlate(struct pwm_chip *chip, const struct of_phandle_args *args); struct pwm_device *pwm_get(struct device *dev, const char *con_id); -- cgit v1.2.3 From 9e70a5e109a4a23367810de09be826c52d27ee2f Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 17 Jul 2023 21:52:06 +0206 Subject: printk: Add per-console suspended state Currently the global @console_suspended is used to determine if consoles are in a suspended state. Its primary purpose is to allow usage of the console_lock when suspended without causing console printing. It is synchronized by the console_lock. Rather than relying on the console_lock to determine suspended state, make it an official per-console state that is set within console->flags. This allows the state to be queried via SRCU. Remove @console_suspended. Console printing will still be avoided when suspended because console_is_usable() returns false when the new suspended flag is set for that console. Signed-off-by: John Ogness Reviewed-by: Sergey Senozhatsky Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230717194607.145135-7-john.ogness@linutronix.de --- include/linux/console.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index d3195664baa5..7de11c763eb3 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -154,6 +154,8 @@ static inline int con_debug_leave(void) * receiving the printk spam for obvious reasons. * @CON_EXTENDED: The console supports the extended output format of * /dev/kmesg which requires a larger output buffer. + * @CON_SUSPENDED: Indicates if a console is suspended. If true, the + * printing callbacks must not be called. */ enum cons_flags { CON_PRINTBUFFER = BIT(0), @@ -163,6 +165,7 @@ enum cons_flags { CON_ANYTIME = BIT(4), CON_BRL = BIT(5), CON_EXTENDED = BIT(6), + CON_SUSPENDED = BIT(7), }; /** -- cgit v1.2.3 From 481461f5109919babbb393d6f68002936b8e2493 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 16 Jul 2023 19:15:54 +0900 Subject: linux/export.h: make independent of CONFIG_MODULES Currently, all files with EXPORT_SYMBOL() are rebuilt when CONFIG_MODULES is flipped due to depending on CONFIG_MODULES. Now that modpost can make a final decision about export symbols, does not need to make EXPORT_SYMBOL() no-op. Instead, modpost can skip emitting KSYMTAB when CONFIG_MODULES is unset. This commit will reduce the number of recompilation when CONFIG_MODULES is toggled. Signed-off-by: Masahiro Yamada --- include/linux/export.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/export.h b/include/linux/export.h index beed8387e0a4..9911508a9604 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -50,7 +50,7 @@ extern struct module __this_module; __EXPORT_SYMBOL_REF(sym) ASM_NL \ .previous -#if !defined(CONFIG_MODULES) || defined(__DISABLE_EXPORTS) +#if defined(__DISABLE_EXPORTS) /* * Allow symbol exports to be disabled completely so that C code may @@ -75,7 +75,7 @@ extern struct module __this_module; __ADDRESSABLE(sym) \ asm(__stringify(___EXPORT_SYMBOL(sym, license, ns))) -#endif /* CONFIG_MODULES */ +#endif #ifdef DEFAULT_SYMBOL_NAMESPACE #define _EXPORT_SYMBOL(sym, license) __EXPORT_SYMBOL(sym, license, __stringify(DEFAULT_SYMBOL_NAMESPACE)) -- cgit v1.2.3 From db2d6038c5e795cab4f0a8d3e86b4f7e33338629 Mon Sep 17 00:00:00 2001 From: Benjamin Bara Date: Sat, 15 Jul 2023 09:53:25 +0200 Subject: kernel/reboot: Add device to sys_off_handler If the dev is known (e.g. a devm-based sys_off_handler is used), it can be passed to the handler's callback to have it available there. Otherwise, cb_data might be set to the dev in most of the cases. Reviewed-by: Dmitry Osipenko Signed-off-by: Benjamin Bara Link: https://lore.kernel.org/r/20230327-tegra-pmic-reboot-v7-3-18699d5dcd76@skidata.com Signed-off-by: Lee Jones --- include/linux/reboot.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 2b6bb593be5b..c4cc3b89ced1 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -129,11 +129,14 @@ enum sys_off_mode { * @cb_data: User's callback data. * @cmd: Command string. Currently used only by the sys-off restart mode, * NULL otherwise. + * @dev: Device of the sys-off handler. Only if known (devm_register_*), + * NULL otherwise. */ struct sys_off_data { int mode; void *cb_data; const char *cmd; + struct device *dev; }; struct sys_off_handler * -- cgit v1.2.3 From 687fe7dfb736b03ab820d172ea5dbfc1ec447135 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 23 Jul 2023 22:30:18 -0700 Subject: Input: tca6416-keypad - always expect proper IRQ number in i2c client Remove option having i2c client contain raw gpio number instead of proper IRQ number. There are no users of this facility in mainline and it will allow cleaning up the driver code with regard to wakeup handling, etc. Link: https://lore.kernel.org/r/20230724053024.352054-1-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- include/linux/tca6416_keypad.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tca6416_keypad.h b/include/linux/tca6416_keypad.h index b0d36a9934cc..5cf6f6f82aa7 100644 --- a/include/linux/tca6416_keypad.h +++ b/include/linux/tca6416_keypad.h @@ -25,7 +25,6 @@ struct tca6416_keys_platform_data { unsigned int rep:1; /* enable input subsystem auto repeat */ uint16_t pinmask; uint16_t invert; - int irq_is_gpio; int use_polling; /* use polling if Interrupt is not connected*/ }; #endif -- cgit v1.2.3 From 63b93099359eca37c11e4d5db5ea2c3a375f6026 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 29 Jul 2023 23:17:46 +0300 Subject: ata: libata: fix parameter type of ata_deadline() ata_deadline() passes its 'unsigned long timeout_msecs' parameter verbatim to msecs_to_jiffies() which takes just 'unsigned int' -- eliminate unneeded implicit cast... Signed-off-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 820f7a3a2749..d9edb3d62a42 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1876,7 +1876,7 @@ static inline int ata_check_ready(u8 status) } static inline unsigned long ata_deadline(unsigned long from_jiffies, - unsigned long timeout_msecs) + unsigned int timeout_msecs) { return from_jiffies + msecs_to_jiffies(timeout_msecs); } -- cgit v1.2.3 From 84abed36d7de7145d393f9d5d05b36717e0cb49d Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 29 Jul 2023 23:17:47 +0300 Subject: ata: libata-core: fix parameter types of ata_wait_register() ata_wait_register() passes its 'unsigned long {interval|timeout}' params verbatim to ata_{msleep|deadline}() that just take 'unsigned int' param for the time intervals in ms -- eliminate unneeded implicit casts... Signed-off-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index d9edb3d62a42..4772f64af734 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1116,7 +1116,7 @@ static inline void ata_sas_port_resume(struct ata_port *ap) extern int ata_ratelimit(void); extern void ata_msleep(struct ata_port *ap, unsigned int msecs); extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, - u32 val, unsigned long interval, unsigned long timeout); + u32 val, unsigned int interval, unsigned int timeout); extern int atapi_cmd_type(u8 opcode); extern unsigned int ata_pack_xfermask(unsigned int pio_mask, unsigned int mwdma_mask, -- cgit v1.2.3 From d14d41cc5aaef138face9d5a145b460e2b63697a Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 29 Jul 2023 23:17:49 +0300 Subject: ata: fix debounce timings type sata_deb_timing_{hotplug|long|normal}[] store 'unsigned long' debounce timeouts in ms, while sata_link_debounce() eventually uses those timeouts by calling ata_{deadline|msleep}( which take just 'unsigned int'. Change the debounce timeout table element's type to 'unsigned int' -- all these timeouts happily fit into 'unsigned int'... Signed-off-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- include/linux/libata.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 4772f64af734..8d510fb00591 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1166,11 +1166,11 @@ extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port * * SATA specific code - drivers/ata/libata-sata.c */ #ifdef CONFIG_SATA_HOST -extern const unsigned long sata_deb_timing_normal[]; -extern const unsigned long sata_deb_timing_hotplug[]; -extern const unsigned long sata_deb_timing_long[]; +extern const unsigned int sata_deb_timing_normal[]; +extern const unsigned int sata_deb_timing_hotplug[]; +extern const unsigned int sata_deb_timing_long[]; -static inline const unsigned long * +static inline const unsigned int * sata_ehc_deb_timing(struct ata_eh_context *ehc) { if (ehc->i.flags & ATA_EHI_HOTPLUGGED) @@ -1185,14 +1185,14 @@ extern int sata_scr_write(struct ata_link *link, int reg, u32 val); extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val); extern int sata_set_spd(struct ata_link *link); extern int sata_link_hardreset(struct ata_link *link, - const unsigned long *timing, unsigned long deadline, + const unsigned int *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)); -extern int sata_link_resume(struct ata_link *link, const unsigned long *params, +extern int sata_link_resume(struct ata_link *link, const unsigned int *params, unsigned long deadline); extern int ata_eh_read_sense_success_ncq_log(struct ata_link *link); extern void ata_eh_analyze_ncq_error(struct ata_link *link); #else -static inline const unsigned long * +static inline const unsigned int * sata_ehc_deb_timing(struct ata_eh_context *ehc) { return NULL; @@ -1212,7 +1212,7 @@ static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val) } static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; } static inline int sata_link_hardreset(struct ata_link *link, - const unsigned long *timing, + const unsigned int *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)) @@ -1222,7 +1222,7 @@ static inline int sata_link_hardreset(struct ata_link *link, return -EOPNOTSUPP; } static inline int sata_link_resume(struct ata_link *link, - const unsigned long *params, + const unsigned int *params, unsigned long deadline) { return -EOPNOTSUPP; @@ -1234,7 +1234,7 @@ static inline int ata_eh_read_sense_success_ncq_log(struct ata_link *link) static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { } #endif extern int sata_link_debounce(struct ata_link *link, - const unsigned long *params, unsigned long deadline); + const unsigned int *params, unsigned long deadline); extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, bool spm_wakeup); extern int ata_slave_link_init(struct ata_port *ap); -- cgit v1.2.3 From ff8072d589dcff7c1f0345a6ec98b5fc1e9ee2a1 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 31 Jul 2023 16:34:12 +0200 Subject: ata: libata: remove references to non-existing error_handler() With commit 65a15d6560df ("scsi: ipr: Remove SATA support") all libata drivers now have the error_handler() callback provided, so we can stop checking for non-existing error_handler callback. Signed-off-by: Hannes Reinecke [niklas: fixed review comments, rebased, solved conflicts during rebase, fixed bug that unconditionally dumped all QCs, removed the now unused function ata_dump_status(), removed the now unreachable failure paths in atapi_qc_complete(), removed the non-EH function to request ATAPI sense] Signed-off-by: Niklas Cassel Reviewed-by: John Garry Reviewed-by: Jason Yan Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 8d510fb00591..d61e5465076e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1785,7 +1785,7 @@ static inline struct ata_queued_cmd *ata_qc_from_tag(struct ata_port *ap, { struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); - if (unlikely(!qc) || !ap->ops->error_handler) + if (unlikely(!qc)) return qc; if ((qc->flags & (ATA_QCFLAG_ACTIVE | -- cgit v1.2.3 From 43aa43351bb551a7732c1b9f6e8ebb9a6f30b063 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 31 Jul 2023 16:34:13 +0200 Subject: ata,scsi: remove ata_sas_port_{start,stop} callbacks Callbacks are empty now, so remove them. Also, remove the call to ap->ops->port_start() in ata_sas_port_init(), as this would otherwise cause a NULL pointer dereference, now when the callback is gone. Signed-off-by: Hannes Reinecke [niklas: remove the call to ap->ops->port_start() in ata_sas_port_init()] Signed-off-by: Niklas Cassel Reviewed-by: Jason Yan Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index d61e5465076e..f8f8b558a8be 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1244,10 +1244,8 @@ extern struct ata_port *ata_sas_port_alloc(struct ata_host *, extern void ata_sas_async_probe(struct ata_port *ap); extern int ata_sas_sync_probe(struct ata_port *ap); extern int ata_sas_port_init(struct ata_port *); -extern int ata_sas_port_start(struct ata_port *ap); extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); extern void ata_sas_tport_delete(struct ata_port *ap); -extern void ata_sas_port_stop(struct ata_port *ap); extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *); extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap); extern void ata_tf_to_fis(const struct ata_taskfile *tf, -- cgit v1.2.3 From 6c2fe21e08c28bbdb53ada668edea7df1aa9fb1e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 31 Jul 2023 16:34:14 +0200 Subject: ata,scsi: remove ata_sas_port_destroy() Is now a wrapper around kfree(), so call it directly. Signed-off-by: Hannes Reinecke Signed-off-by: Niklas Cassel Reviewed-by: John Garry Reviewed-by: Jason Yan Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- include/linux/libata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index f8f8b558a8be..1d1fd16a1492 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1238,7 +1238,6 @@ extern int sata_link_debounce(struct ata_link *link, extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, bool spm_wakeup); extern int ata_slave_link_init(struct ata_port *ap); -extern void ata_sas_port_destroy(struct ata_port *); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); extern void ata_sas_async_probe(struct ata_port *ap); -- cgit v1.2.3 From 8ac161ea2b3709b789c192de7da31a58aec9d7ee Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 31 Jul 2023 16:34:15 +0200 Subject: ata: libata-sata: remove ata_sas_sync_probe() Unused. Signed-off-by: Hannes Reinecke Signed-off-by: Niklas Cassel Reviewed-by: Jason Yan Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- include/linux/libata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 1d1fd16a1492..d08dde8c8ad2 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1241,7 +1241,6 @@ extern int ata_slave_link_init(struct ata_port *ap); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); extern void ata_sas_async_probe(struct ata_port *ap); -extern int ata_sas_sync_probe(struct ata_port *ap); extern int ata_sas_port_init(struct ata_port *); extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); extern void ata_sas_tport_delete(struct ata_port *ap); -- cgit v1.2.3 From a76f1b637ce92c2b8d5da912826079010b11f138 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 31 Jul 2023 16:34:17 +0200 Subject: ata,scsi: cleanup __ata_port_probe() Rename __ata_port_probe() to ata_port_probe() and drop the wrapper ata_sas_async_probe(). Signed-off-by: Hannes Reinecke Signed-off-by: Niklas Cassel Reviewed-by: Jason Yan Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index d08dde8c8ad2..7468a330fc77 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1240,7 +1240,7 @@ extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, extern int ata_slave_link_init(struct ata_port *ap); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); -extern void ata_sas_async_probe(struct ata_port *ap); +extern void ata_port_probe(struct ata_port *ap); extern int ata_sas_port_init(struct ata_port *); extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); extern void ata_sas_tport_delete(struct ata_port *ap); -- cgit v1.2.3 From 541528170a5cb1342c378dfd46b04dfe024dbc7a Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 31 Jul 2023 16:34:18 +0200 Subject: ata,scsi: remove ata_sas_port_init() ata_sas_port_init() now only contains a single initialization. Move this single initialization to ata_sas_port_alloc(), since: 1) ata_sas_port_alloc() already initializes some of the struct members. 2) ata_sas_port_alloc() is only used by libsas. Suggested-by: John Garry Signed-off-by: Niklas Cassel Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- include/linux/libata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 7468a330fc77..0980992c54c2 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1241,7 +1241,6 @@ extern int ata_slave_link_init(struct ata_port *ap); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); extern void ata_port_probe(struct ata_port *ap); -extern int ata_sas_port_init(struct ata_port *); extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); extern void ata_sas_tport_delete(struct ata_port *ap); extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *); -- cgit v1.2.3 From 89329c7384ef56c407269157e30e781f55c3c4d2 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 31 Jul 2023 16:34:20 +0200 Subject: ata: libata-core: remove ata_bus_probe() Remove ata_bus_probe() as it is unused. Also, remove references to ata_bus_probe and port_disable in Documentation/driver-api/libata.rst, as neither exist anymore. Signed-off-by: Niklas Cassel Reviewed-by: John Garry Reviewed-by: Jason Yan Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- include/linux/libata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 0980992c54c2..e176d832467d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -344,7 +344,6 @@ enum { ATA_LINK_RESUME_TRIES = 5, /* how hard are we gonna try to probe/recover devices */ - ATA_PROBE_MAX_TRIES = 3, ATA_EH_DEV_TRIES = 3, ATA_EH_PMP_TRIES = 5, ATA_EH_PMP_LINK_TRIES = 3, -- cgit v1.2.3 From 6b4f165e0858016f7bb5f360966882a819519f07 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 31 Jul 2023 16:34:21 +0200 Subject: ata: libata: remove deprecated EH callbacks Now that all libata drivers have migrated to use the error_handler callback, remove the deprecated phy_reset and eng_timeout callbacks. Also remove references to non-existent functions sata_phy_reset and ata_qc_timeout from Documentation/driver-api/libata.rst. Signed-off-by: Niklas Cassel Reviewed-by: John Garry Reviewed-by: Sergey Shtylyov Reviewed-by: Jason Yan Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- include/linux/libata.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index e176d832467d..52d58b13e5ee 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -975,12 +975,6 @@ struct ata_port_operations { ssize_t (*transmit_led_message)(struct ata_port *ap, u32 state, ssize_t size); - /* - * Obsolete - */ - void (*phy_reset)(struct ata_port *ap); - void (*eng_timeout)(struct ata_port *ap); - /* * ->inherits must be the last field and all the preceding * fields must be pointers. -- cgit v1.2.3 From 54e73cd52250adeba836cd3afef3658b48ae8dc9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 1 Aug 2023 12:58:15 +0200 Subject: virtio: Remove PM #ifdef guards to fix i2c driver A cleanup in the virtio i2c caused a build failure: drivers/i2c/busses/i2c-virtio.c:270:10: error: 'struct virtio_driver' has no member named 'freeze' drivers/i2c/busses/i2c-virtio.c:271:10: error: 'struct virtio_driver' has no member named 'restore' Change the structure definition to allow this cleanup to be applied everywhere. Fixes: 73d546c76235b ("i2c: virtio: Remove #ifdef guards for PM related functions") Signed-off-by: Arnd Bergmann Reviewed-by: Paul Cercueil Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20230801105846.3708252-1-arnd@kernel.org Signed-off-by: Andi Shyti --- include/linux/virtio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index de6041deee37..7ed071b5ef07 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -184,10 +184,8 @@ struct virtio_driver { void (*scan)(struct virtio_device *dev); void (*remove)(struct virtio_device *dev); void (*config_changed)(struct virtio_device *dev); -#ifdef CONFIG_PM int (*freeze)(struct virtio_device *dev); int (*restore)(struct virtio_device *dev); -#endif }; static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv) -- cgit v1.2.3 From d890cfc25fe9421ffdff3a9ea678172addb36762 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 7 Aug 2023 09:31:06 +0200 Subject: rtc: ds2404: Convert to GPIO descriptors This converts the DS2404 to use GPIO descriptors instead of hard-coded global GPIO numbers. The platform data can be deleted because there are no in-tree users and it only contained GPIO numbers which are now passed using descriptor tables (or device tree or ACPI). The driver was rewritten to use a state container for the device driver state (struct ds2404 *chip) and pass that around instead of using a global singleton storage for the GPIO handles. When declaring GPIO descriptor tables or other hardware descriptions for the RTC driver, implementers should take care to flag the RESET line as active low, such as by using the GPIOD_ACTIVE_LOW flag in the descriptor table. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20230807-descriptors-rtc-v1-1-ce0f9187576e@linaro.org Signed-off-by: Alexandre Belloni --- include/linux/platform_data/rtc-ds2404.h | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 include/linux/platform_data/rtc-ds2404.h (limited to 'include/linux') diff --git a/include/linux/platform_data/rtc-ds2404.h b/include/linux/platform_data/rtc-ds2404.h deleted file mode 100644 index 22c53825528f..000000000000 --- a/include/linux/platform_data/rtc-ds2404.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * ds2404.h - platform data structure for the DS2404 RTC. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2012 Sven Schnelle - */ - -#ifndef __LINUX_DS2404_H -#define __LINUX_DS2404_H - -struct ds2404_platform_data { - - unsigned int gpio_rst; - unsigned int gpio_clk; - unsigned int gpio_dq; -}; -#endif -- cgit v1.2.3 From afb48153220d35f330d0d979792920a31f7d9a81 Mon Sep 17 00:00:00 2001 From: Jean-Jacques Hiblot Date: Fri, 28 Jul 2023 17:37:28 +0200 Subject: leds: Provide devm_of_led_get_optional() Add an optional variant of devm_of_led_get(). It behaves the same as devm_of_led_get() except where the LED doesn't exist. In this case, instead of returning -ENOENT, the function returns NULL. Signed-off-by: Jean-Jacques Hiblot Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230728153731.3742339-2-jjhiblot@traphandler.com Signed-off-by: Lee Jones --- include/linux/leds.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 7d428100b42b..8740b4e47f88 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -313,6 +313,8 @@ extern struct led_classdev *of_led_get(struct device_node *np, int index); extern void led_put(struct led_classdev *led_cdev); struct led_classdev *__must_check devm_of_led_get(struct device *dev, int index); +struct led_classdev *__must_check devm_of_led_get_optional(struct device *dev, + int index); /** * led_blink_set - set blinking with software fallback -- cgit v1.2.3 From c7d80059b086c4986cd994a1973ec7a5d75f8eea Mon Sep 17 00:00:00 2001 From: Jean-Jacques Hiblot Date: Fri, 28 Jul 2023 17:37:29 +0200 Subject: leds: class: Store the color index in struct led_classdev Store the color of the LED so that it is not lost after the LED's name has been composed. This color information can then be exposed to the user space or used by the LED consumer. Signed-off-by: Jean-Jacques Hiblot Link: https://lore.kernel.org/r/20230728153731.3742339-3-jjhiblot@traphandler.com Signed-off-by: Lee Jones --- include/linux/leds.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 8740b4e47f88..aa16dc2a8230 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -100,6 +100,7 @@ struct led_classdev { const char *name; unsigned int brightness; unsigned int max_brightness; + unsigned int color; int flags; /* Lower 16 bits reflect status */ -- cgit v1.2.3 From a1342c8027288e345cc5fd16c6800f9d4eb788ed Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 11 Aug 2023 04:51:14 +0000 Subject: KVM: Rename kvm_arch_flush_remote_tlb() to kvm_arch_flush_remote_tlbs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename kvm_arch_flush_remote_tlb() and the associated macro __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB to kvm_arch_flush_remote_tlbs() and __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS respectively. Making the name plural matches kvm_flush_remote_tlbs() and makes it more clear that this function can affect more than one remote TLB. No functional change intended. Signed-off-by: David Matlack Signed-off-by: Raghavendra Rao Ananta Reviewed-by: Gavin Shan Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Shaoqin Huang Acked-by: Sean Christopherson Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230811045127.3308641-2-rananta@google.com --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9d3ac7720da9..e3f968b38ae9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1479,8 +1479,8 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) } #endif -#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB -static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) +#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS +static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { return -ENOTSUPP; } -- cgit v1.2.3 From cfb0c08e80120928dda1e951718be135abd49bae Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Fri, 11 Aug 2023 04:51:15 +0000 Subject: KVM: Declare kvm_arch_flush_remote_tlbs() globally There's no reason for the architectures to declare kvm_arch_flush_remote_tlbs() in their own headers. Hence to avoid this duplication, make the declaration global, leaving the architectures to define only __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS as needed. Signed-off-by: Raghavendra Rao Ananta Reviewed-by: Shaoqin Huang Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230811045127.3308641-3-rananta@google.com --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e3f968b38ae9..ade5d4500c2c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1484,6 +1484,8 @@ static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { return -ENOTSUPP; } +#else +int kvm_arch_flush_remote_tlbs(struct kvm *kvm); #endif #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA -- cgit v1.2.3 From d4788996051e3c07fadc6d9b214073fcf78810a8 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 11 Aug 2023 04:51:18 +0000 Subject: KVM: Allow range-based TLB invalidation from common code Make kvm_flush_remote_tlbs_range() visible in common code and create a default implementation that just invalidates the whole TLB. This paves the way for several future features/cleanups: - Introduction of range-based TLBI on ARM. - Eliminating kvm_arch_flush_remote_tlbs_memslot() - Moving the KVM/x86 TDP MMU to common code. No functional change intended. Signed-off-by: David Matlack Signed-off-by: Raghavendra Rao Ananta Reviewed-by: Gavin Shan Reviewed-by: Shaoqin Huang Reviewed-by: Anup Patel Acked-by: Sean Christopherson Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230811045127.3308641-6-rananta@google.com --- include/linux/kvm_host.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ade5d4500c2c..89d2614e4b7a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1359,6 +1359,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); void kvm_flush_remote_tlbs(struct kvm *kvm); +void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); @@ -1488,6 +1489,16 @@ static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) int kvm_arch_flush_remote_tlbs(struct kvm *kvm); #endif +#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE +static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, + gfn_t gfn, u64 nr_pages) +{ + return -EOPNOTSUPP; +} +#else +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); +#endif + #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA void kvm_arch_register_noncoherent_dma(struct kvm *kvm); void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm); -- cgit v1.2.3 From 619b5072443c05cf18c31b2c0320cdb42396d411 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 11 Aug 2023 04:51:19 +0000 Subject: KVM: Move kvm_arch_flush_remote_tlbs_memslot() to common code Move kvm_arch_flush_remote_tlbs_memslot() to common code and drop "arch_" from the name. kvm_arch_flush_remote_tlbs_memslot() is just a range-based TLB invalidation where the range is defined by the memslot. Now that kvm_flush_remote_tlbs_range() can be called from common code we can just use that and drop a bunch of duplicate code from the arch directories. Note this adds a lockdep assertion for slots_lock being held when calling kvm_flush_remote_tlbs_memslot(), which was previously only asserted on x86. MIPS has calls to kvm_flush_remote_tlbs_memslot(), but they all hold the slots_lock, so the lockdep assertion continues to hold true. Also drop the CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT ifdef gating kvm_flush_remote_tlbs_memslot(), since it is no longer necessary. Signed-off-by: David Matlack Signed-off-by: Raghavendra Rao Ananta Reviewed-by: Gavin Shan Reviewed-by: Shaoqin Huang Acked-by: Anup Patel Acked-by: Sean Christopherson Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230811045127.3308641-7-rananta@google.com --- include/linux/kvm_host.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 89d2614e4b7a..394db2ce11e2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1360,6 +1360,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); +void kvm_flush_remote_tlbs_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot); #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); @@ -1388,10 +1390,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, unsigned long mask); void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot); -#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot); -#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ +#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty, struct kvm_memory_slot **memslot); -- cgit v1.2.3 From 3e1efe2b67d3d38116ec010968dbcd89d29e4561 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 28 Jul 2023 17:41:44 -0700 Subject: KVM: Wrap kvm_{gfn,hva}_range.pte in a per-action union Wrap kvm_{gfn,hva}_range.pte in a union so that future notifier events can pass event specific information up and down the stack without needing to constantly expand and churn the APIs. Lockless aging of SPTEs will pass around a bitmap, and support for memory attributes will pass around the new attributes for the range. Add a "KVM_NO_ARG" placeholder to simplify handling events without an argument (creating a dummy union variable is midly annoying). Opportunstically drop explicit zero-initialization of the "pte" field, as omitting the field (now a union) has the same effect. Cc: Yu Zhao Link: https://lore.kernel.org/all/CAOUHufagkd2Jk3_HrVoFFptRXM=hX2CV8f+M-dka-hJU4bP8kw@mail.gmail.com Reviewed-by: Oliver Upton Acked-by: Yu Zhao Link: https://lore.kernel.org/r/20230729004144.1054885-1-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9d3ac7720da9..9125d0ab642d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -256,11 +256,15 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif #ifdef KVM_ARCH_WANT_MMU_NOTIFIER +union kvm_mmu_notifier_arg { + pte_t pte; +}; + struct kvm_gfn_range { struct kvm_memory_slot *slot; gfn_t start; gfn_t end; - pte_t pte; + union kvm_mmu_notifier_arg arg; bool may_block; }; bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); -- cgit v1.2.3 From 1f8403953f05af591ab72cf749b9b9b837ea9595 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Mon, 14 Aug 2023 22:03:39 +0800 Subject: KVM: Remove unused kvm_device_{get,put}() declarations Commit 07f0a7bdec5c ("kvm: destroy emulated devices on VM exit") removed the functions but not these declarations. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20230814140339.47732-1-yuehaibing@huawei.com [sean: split to separate patch] Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9125d0ab642d..a973a7cb45e0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2152,8 +2152,6 @@ struct kvm_device_ops { int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma); }; -void kvm_device_get(struct kvm_device *dev); -void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type); void kvm_unregister_device_ops(u32 type); -- cgit v1.2.3 From 458933d33af2cb3663bd8c0080c1efd1f9483db4 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Mon, 14 Aug 2023 22:03:39 +0800 Subject: KVM: Remove unused kvm_make_cpus_request_mask() declaration Commit 7ee30bc132c6 ("KVM: x86: deliver KVM IOAPIC scan request to target vCPUs") declared but never implemented kvm_make_cpus_request_mask() as kvm_make_vcpus_request_mask() already existed. Note, KVM's APIs are painfully inconsistent, as the inclusive variant uses "vcpus", whereas the exclusive/all variants use "cpus", which is likely what led to the spurious declaration. The "vcpus" terminology is more correct, especially since the helpers will kick _physical_ CPUs by calling kvm_kick_many_cpus(). But that's a cleanup for the future. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20230814140339.47732-1-yuehaibing@huawei.com [sean: split to separate patch, call out inconsistent naming] Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a973a7cb45e0..199698a5ffa6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -190,8 +190,6 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, struct kvm_vcpu *except); -bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req, - unsigned long *vcpu_bitmap); #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3 From 2459f4dfe5529f8b847f452473e2da08a8fc9fe5 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 6 Jul 2023 19:39:39 +0800 Subject: mfd: hi655x-pmic: Convert to devm_platform_ioremap_resource() Use devm_platform_ioremap_resource() to simplify code. Signed-off-by: Yangtao Li Link: https://lore.kernel.org/r/20230706113939.1178-7-frank.li@vivo.com Signed-off-by: Lee Jones --- include/linux/mfd/hi655x-pmic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/hi655x-pmic.h b/include/linux/mfd/hi655x-pmic.h index 6a012784dd1b..194556851ccf 100644 --- a/include/linux/mfd/hi655x-pmic.h +++ b/include/linux/mfd/hi655x-pmic.h @@ -52,7 +52,6 @@ #define OTMP_D1R_INT_MASK BIT(OTMP_D1R_INT) struct hi655x_pmic { - struct resource *res; struct device *dev; struct regmap *regmap; struct gpio_desc *gpio; -- cgit v1.2.3 From 2dfe293bcde2d302c045d0cd536ccb15f86385d2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 20 Jul 2023 22:37:38 +0800 Subject: mfd: db8500-prcmu: Remove unused inline functions Since commit b0e846248de5 ("mfd: db8500-prcmu: Remove dead code for a non-existing config") these inline helpers also no need any more. Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20230720143738.13996-1-yuehaibing@huawei.com Signed-off-by: Lee Jones --- include/linux/mfd/dbx500-prcmu.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h index e7a7e70fdb38..dd0fc891b228 100644 --- a/include/linux/mfd/dbx500-prcmu.h +++ b/include/linux/mfd/dbx500-prcmu.h @@ -556,16 +556,6 @@ static inline void prcmu_clear(unsigned int reg, u32 bits) #define PRCMU_QOS_ARM_OPP 3 #define PRCMU_QOS_DEFAULT_VALUE -1 -static inline unsigned long prcmu_qos_get_cpufreq_opp_delay(void) -{ - return 0; -} - -static inline int prcmu_qos_requirement(int prcmu_qos_class) -{ - return 0; -} - static inline int prcmu_qos_add_requirement(int prcmu_qos_class, char *name, s32 value) { @@ -582,15 +572,4 @@ static inline void prcmu_qos_remove_requirement(int prcmu_qos_class, char *name) { } -static inline int prcmu_qos_add_notifier(int prcmu_qos_class, - struct notifier_block *notifier) -{ - return 0; -} -static inline int prcmu_qos_remove_notifier(int prcmu_qos_class, - struct notifier_block *notifier) -{ - return 0; -} - #endif /* __MACH_PRCMU_H */ -- cgit v1.2.3 From 10d3340441bd0db857fc7fcb1733a800acf47a3d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Jul 2023 11:02:23 +0200 Subject: mfd: rz-mtu3: Link time dependencies The new set of drivers for RZ/G2L MTU3a tries to enable compile-testing the individual client drivers even when the MFD portion is disabled but gets it wrong, causing a link failure when the core is in a loadable module but the other drivers are built-in: x86_64-linux-ld: drivers/pwm/pwm-rz-mtu3.o: in function `rz_mtu3_pwm_apply': pwm-rz-mtu3.c:(.text+0x4bf): undefined reference to `rz_mtu3_8bit_ch_write' x86_64-linux-ld: pwm-rz-mtu3.c:(.text+0x509): undefined reference to `rz_mtu3_disable' arm-linux-gnueabi-ld: drivers/counter/rz-mtu3-cnt.o: in function `rz_mtu3_cascade_counts_enable_get': rz-mtu3-cnt.c:(.text+0xbec): undefined reference to `rz_mtu3_shared_reg_read' It seems better not to add the extra complexity here but instead just use a normal hard dependency, so remove the #else portion in the header along with the "|| COMPILE_TEST". This could also be fixed by having slightly more elaborate Kconfig dependencies or using the cursed 'IS_REACHABLE()' helper, but in practice it's already possible to compile-test all these drivers by enabling the mtd portion. Fixes: 254d3a727421c ("pwm: Add Renesas RZ/G2L MTU3a PWM driver") Fixes: 0be8907359df4 ("counter: Add Renesas RZ/G2L MTU3a counter driver") Fixes: 654c293e1687b ("mfd: Add Renesas RZ/G2L MTU3a core driver") Signed-off-by: Arnd Bergmann Acked-by: Thierry Reding Reviewed-by: Biju Das Link: https://lore.kernel.org/r/20230719090430.1925182-1-arnd@kernel.org Signed-off-by: Lee Jones --- include/linux/mfd/rz-mtu3.h | 66 --------------------------------------------- 1 file changed, 66 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rz-mtu3.h b/include/linux/mfd/rz-mtu3.h index c5173bc06270..8421d49500bf 100644 --- a/include/linux/mfd/rz-mtu3.h +++ b/include/linux/mfd/rz-mtu3.h @@ -151,7 +151,6 @@ struct rz_mtu3 { void *priv_data; }; -#if IS_ENABLED(CONFIG_RZ_MTU3) static inline bool rz_mtu3_request_channel(struct rz_mtu3_channel *ch) { mutex_lock(&ch->lock); @@ -188,70 +187,5 @@ void rz_mtu3_32bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u32 val); void rz_mtu3_shared_reg_write(struct rz_mtu3_channel *ch, u16 off, u16 val); void rz_mtu3_shared_reg_update_bit(struct rz_mtu3_channel *ch, u16 off, u16 pos, u8 val); -#else -static inline bool rz_mtu3_request_channel(struct rz_mtu3_channel *ch) -{ - return false; -} - -static inline void rz_mtu3_release_channel(struct rz_mtu3_channel *ch) -{ -} - -static inline bool rz_mtu3_is_enabled(struct rz_mtu3_channel *ch) -{ - return false; -} - -static inline void rz_mtu3_disable(struct rz_mtu3_channel *ch) -{ -} - -static inline int rz_mtu3_enable(struct rz_mtu3_channel *ch) -{ - return 0; -} - -static inline u8 rz_mtu3_8bit_ch_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline u16 rz_mtu3_16bit_ch_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline u32 rz_mtu3_32bit_ch_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline u16 rz_mtu3_shared_reg_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline void rz_mtu3_8bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u8 val) -{ -} - -static inline void rz_mtu3_16bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u16 val) -{ -} - -static inline void rz_mtu3_32bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u32 val) -{ -} - -static inline void rz_mtu3_shared_reg_write(struct rz_mtu3_channel *ch, u16 off, u16 val) -{ -} - -static inline void rz_mtu3_shared_reg_update_bit(struct rz_mtu3_channel *ch, - u16 off, u16 pos, u8 val) -{ -} -#endif #endif /* __MFD_RZ_MTU3_H__ */ -- cgit v1.2.3 From e0d77323824054f15f3137e890f7addc48198a3e Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Fri, 28 Jul 2023 21:27:09 +0800 Subject: mfd: max77686: Remove unused extern declarations max77686_irq_init() and max77686_irq_exit() are not used since commit 6f1c1e71d933 ("mfd: max77686: Convert to use regmap_irq"). And max77686_irq_resume() never be implemented since introduced in commit dae8a969d512 ("mfd: Add Maxim 77686 driver"). Signed-off-by: Yue Haibing Reviewed-by: Krzysztof Kozlowski Reviewed-by: Chanwoo Choi Link: https://lore.kernel.org/r/20230728132709.27052-1-yuehaibing@huawei.com Signed-off-by: Lee Jones --- include/linux/mfd/max77686-private.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h index 3acceeedbaba..ea635d12a741 100644 --- a/include/linux/mfd/max77686-private.h +++ b/include/linux/mfd/max77686-private.h @@ -441,8 +441,4 @@ enum max77686_types { TYPE_MAX77802, }; -extern int max77686_irq_init(struct max77686_dev *max77686); -extern void max77686_irq_exit(struct max77686_dev *max77686); -extern int max77686_irq_resume(struct max77686_dev *max77686); - #endif /* __LINUX_MFD_MAX77686_PRIV_H */ -- cgit v1.2.3 From 733e2e9a28e6fa109e51e0b77901552f69df0ef1 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Fri, 28 Jul 2023 21:24:39 +0800 Subject: mfd: ab8500: Remove unused extern declarations commit d28f1db8187d ("mfd: Remove confusing ab8500-i2c file and merge into ab8500-core") left behind this. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20230728132439.31568-1-yuehaibing@huawei.com Signed-off-by: Lee Jones --- include/linux/mfd/abx500/ab8500.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h index 302a330c5c84..09fb3c56e7d7 100644 --- a/include/linux/mfd/abx500/ab8500.h +++ b/include/linux/mfd/abx500/ab8500.h @@ -382,10 +382,6 @@ struct ab8500_platform_data { struct ab8500_sysctrl_platform_data *sysctrl; }; -extern int ab8500_init(struct ab8500 *ab8500, - enum ab8500_version version); -extern int ab8500_exit(struct ab8500 *ab8500); - extern int ab8500_suspend(struct ab8500 *ab8500); static inline int is_ab8500(struct ab8500 *ab) -- cgit v1.2.3 From 54ab43a957bcb2643c13df5ab71de9dc3f72e5a6 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Fri, 28 Jul 2023 21:28:41 +0800 Subject: mfd: 88pm860x: Remove unused extern declarations commit 260a127bfbeb ("mfd: 88pm860x-i2c: Purge unused functions") left behind this. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20230728132841.10648-1-yuehaibing@huawei.com Signed-off-by: Lee Jones --- include/linux/mfd/88pm860x.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h index 473545a2c425..6fa21791fc85 100644 --- a/include/linux/mfd/88pm860x.h +++ b/include/linux/mfd/88pm860x.h @@ -472,13 +472,7 @@ extern int pm860x_bulk_read(struct i2c_client *, int, int, unsigned char *); extern int pm860x_bulk_write(struct i2c_client *, int, int, unsigned char *); extern int pm860x_set_bits(struct i2c_client *, int, unsigned char, unsigned char); -extern int pm860x_page_reg_read(struct i2c_client *, int); extern int pm860x_page_reg_write(struct i2c_client *, int, unsigned char); extern int pm860x_page_bulk_read(struct i2c_client *, int, int, unsigned char *); -extern int pm860x_page_bulk_write(struct i2c_client *, int, int, - unsigned char *); -extern int pm860x_page_set_bits(struct i2c_client *, int, unsigned char, - unsigned char); - #endif /* __LINUX_MFD_88PM860X_H */ -- cgit v1.2.3 From 875386b98857822b77ac7f95bdf367b70af5b78c Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Mon, 21 Aug 2023 18:30:37 +0530 Subject: scsi: qla2xxx: Add Unsolicited LS Request and Response Support for NVMe Introduce infrastructure in the driver to support the processing of unsolicited LS (Link Service) requests. This will involve the utilization of a new pass-up of unsolicited FC-NVMe request IOCB interface. Unsolicited requests will be submitted to the NVMe transport layer through nvme_fc_rcv_ls_req(). Any received LS responses, which are sent using xmt_ls_rsp(), will be forwarded to the firmware through the existing Pass-Through IOCB interface, responsible for sending FC-NVMe Link Service requests and responses. Signed-off-by: Manish Rangankar Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230821130045.34850-2-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- include/linux/nvme-fc-driver.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 4109f1bd6128..f6ef8cf5d774 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -53,10 +53,10 @@ struct nvmefc_ls_req { void *rqstaddr; dma_addr_t rqstdma; - u32 rqstlen; + __le32 rqstlen; void *rspaddr; dma_addr_t rspdma; - u32 rsplen; + __le32 rsplen; u32 timeout; void *private; @@ -120,7 +120,7 @@ struct nvmefc_ls_req { struct nvmefc_ls_rsp { void *rspbuf; dma_addr_t rspdma; - u16 rsplen; + __le32 rsplen; void (*done)(struct nvmefc_ls_rsp *rsp); void *nvme_fc_private; /* LLDD is not to access !! */ -- cgit v1.2.3 From 08b8a0440eeec83f8330349f829908858fd52d31 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 14 Mar 2022 15:47:18 -0400 Subject: libceph: add spinlock around osd->o_requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a later patch, we're going to need to search for a request in the rbtree, but taking the o_mutex is inconvenient as we already hold the con mutex at the point where we need it. Add a new spinlock that we take when inserting and erasing entries from the o_requests tree. Search of the rbtree can be done with either the mutex or the spinlock, but insertion and removal requires both. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index fb6be72104df..92addef18738 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -29,7 +29,12 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); #define CEPH_HOMELESS_OSD -1 -/* a given osd we're communicating with */ +/* + * A given osd we're communicating with. + * + * Note that the o_requests tree can be searched while holding the "lock" mutex + * or the "o_requests_lock" spinlock. Insertion or removal requires both! + */ struct ceph_osd { refcount_t o_ref; struct ceph_osd_client *o_osdc; @@ -37,6 +42,7 @@ struct ceph_osd { int o_incarnation; struct rb_node o_node; struct ceph_connection o_con; + spinlock_t o_requests_lock; struct rb_root o_requests; struct rb_root o_linger_requests; struct rb_root o_backoff_mappings; -- cgit v1.2.3 From a679e50f728648f7b2f3b349e082448abd388038 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Mar 2022 15:23:00 -0400 Subject: libceph: define struct ceph_sparse_extent and add some helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the OSD sends back a sparse read reply, it contains an array of these structures. Define the structure and add a couple of helpers for dealing with them. Also add a place in struct ceph_osd_req_op to store the extent buffer, and code to free it if it's populated when the req is torn down. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 43 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 92addef18738..05da1e755b7b 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -29,6 +29,17 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); #define CEPH_HOMELESS_OSD -1 +/* + * A single extent in a SPARSE_READ reply. + * + * Note that these come from the OSD as little-endian values. On BE arches, + * we convert them in-place after receipt. + */ +struct ceph_sparse_extent { + u64 off; + u64 len; +} __packed; + /* * A given osd we're communicating with. * @@ -104,6 +115,8 @@ struct ceph_osd_req_op { u64 offset, length; u64 truncate_size; u32 truncate_seq; + int sparse_ext_cnt; + struct ceph_sparse_extent *sparse_ext; struct ceph_osd_data osd_data; } extent; struct { @@ -510,6 +523,20 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, u32 truncate_seq, u64 truncate_size, bool use_mempool); +int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt); + +/* + * How big an extent array should we preallocate for a sparse read? This is + * just a starting value. If we get more than this back from the OSD, the + * receiver will reallocate. + */ +#define CEPH_SPARSE_EXT_ARRAY_INITIAL 16 + +static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op) +{ + return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL); +} + extern void ceph_osdc_get_request(struct ceph_osd_request *req); extern void ceph_osdc_put_request(struct ceph_osd_request *req); @@ -564,5 +591,19 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, struct ceph_object_locator *oloc, struct ceph_watch_item **watchers, u32 *num_watchers); -#endif +/* Find offset into the buffer of the end of the extent map */ +static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op) +{ + struct ceph_sparse_extent *ext; + + /* No extents? No data */ + if (op->extent.sparse_ext_cnt == 0) + return 0; + + ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1]; + + return ext->off + ext->len - op->extent.offset; +} + +#endif -- cgit v1.2.3 From ec3bc567eac12c557a2b99bd0b34b5dff12cab23 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 25 Jan 2022 08:26:31 -0500 Subject: libceph: new sparse_read op, support sparse reads on msgr2 crc codepath MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for a new sparse_read ceph_connection operation. The idea is that the client driver can define this operation use it to do special handling for incoming reads. The alloc_msg routine will look at the request and determine whether the reply is expected to be sparse. If it is, then we'll dispatch to a different set of state machine states that will repeatedly call the driver's sparse_read op to get length and placement info for reading the extent map, and the extents themselves. This necessitates adding some new field to some other structs: - The msg gets a new bool to track whether it's a sparse_read request. - A new field is added to the cursor to track the amount remaining in the current extent. This is used to cap the read from the socket into the msg_data - Handing a revoke with all of this is particularly difficult, so I've added a new data_len_remain field to the v2 connection info, and then use that to skip that much on a revoke. We may want to expand the use of that to the normal read path as well, just for consistency's sake. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 99c1726be6ee..8a6938fa324e 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -17,6 +17,7 @@ struct ceph_msg; struct ceph_connection; +struct ceph_msg_data_cursor; /* * Ceph defines these callbacks for handling connection events. @@ -70,6 +71,30 @@ struct ceph_connection_operations { int used_proto, int result, const int *allowed_protos, int proto_cnt, const int *allowed_modes, int mode_cnt); + + /** + * sparse_read: read sparse data + * @con: connection we're reading from + * @cursor: data cursor for reading extents + * @buf: optional buffer to read into + * + * This should be called more than once, each time setting up to + * receive an extent into the current cursor position, and zeroing + * the holes between them. + * + * Returns amount of data to be read (in bytes), 0 if reading is + * complete, or -errno if there was an error. + * + * If @buf is set on a >0 return, then the data should be read into + * the provided buffer. Otherwise, it should be read into the cursor. + * + * The sparse read operation is expected to initialize the cursor + * with a length covering up to the end of the last extent. + */ + int (*sparse_read)(struct ceph_connection *con, + struct ceph_msg_data_cursor *cursor, + char **buf); + }; /* use format string %s%lld */ @@ -207,6 +232,7 @@ struct ceph_msg_data_cursor { struct ceph_msg_data *data; /* current data item */ size_t resid; /* bytes not yet consumed */ + int sr_resid; /* residual sparse_read len */ bool need_crc; /* crc update needed */ union { #ifdef CONFIG_BLOCK @@ -251,6 +277,7 @@ struct ceph_msg { struct kref kref; bool more_to_follow; bool needs_out_seq; + bool sparse_read; int front_alloc_len; struct ceph_msgpool *pool; @@ -395,6 +422,7 @@ struct ceph_connection_v2_info { void *conn_bufs[16]; int conn_buf_cnt; + int data_len_remain; struct kvec in_sign_kvecs[8]; struct kvec out_sign_kvecs[8]; -- cgit v1.2.3 From d396f89db39a2f259e2125ca43b4c31bb65afcad Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 24 Mar 2022 13:33:06 -0400 Subject: libceph: add sparse read support to msgr1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add 2 new fields to ceph_connection_v1_info to track the necessary info in sparse reads. Skip initializing the cursor for a sparse read. Break out read_partial_message_section into a wrapper around a new read_partial_message_chunk function that doesn't zero out the crc first. Add new helper functions to drive receiving into the destinations provided by the sparse_read state machine. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 8a6938fa324e..9fd7255172ad 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -336,6 +336,10 @@ struct ceph_connection_v1_info { int in_base_pos; /* bytes read */ + /* sparse reads */ + struct kvec in_sr_kvec; /* current location to receive into */ + u64 in_sr_len; /* amount of data in this extent */ + /* message in temps */ u8 in_tag; /* protocol control byte */ struct ceph_msg_header in_hdr; -- cgit v1.2.3 From f628d799972799023d32c2542bb2639eb8c4f84e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 11 Feb 2022 11:38:02 -0500 Subject: libceph: add sparse read support to OSD client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Have get_reply check for the presence of sparse read ops in the request and set the sparse_read boolean in the msg. That will queue the messenger layer to use the sparse read codepath instead of the normal data receive. Add a new sparse_read operation for the OSD client, driven by its own state machine. The messenger will repeatedly call the sparse_read operation, and it will pass back the necessary info to set up to read the next extent of data, while zero-filling the sparse regions. The state machine will stop at the end of the last extent, and will attach the extent map buffer to the ceph_osd_req_op so that the caller can use it. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 05da1e755b7b..bfa4813590da 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -40,6 +40,36 @@ struct ceph_sparse_extent { u64 len; } __packed; +/* Sparse read state machine state values */ +enum ceph_sparse_read_state { + CEPH_SPARSE_READ_HDR = 0, + CEPH_SPARSE_READ_EXTENTS, + CEPH_SPARSE_READ_DATA_LEN, + CEPH_SPARSE_READ_DATA, +}; + +/* + * A SPARSE_READ reply is a 32-bit count of extents, followed by an array of + * 64-bit offset/length pairs, and then all of the actual file data + * concatenated after it (sans holes). + * + * Unfortunately, we don't know how long the extent array is until we've + * started reading the data section of the reply. The caller should send down + * a destination buffer for the array, but we'll alloc one if it's too small + * or if the caller doesn't. + */ +struct ceph_sparse_read { + enum ceph_sparse_read_state sr_state; /* state machine state */ + u64 sr_req_off; /* orig request offset */ + u64 sr_req_len; /* orig request length */ + u64 sr_pos; /* current pos in buffer */ + int sr_index; /* current extent index */ + __le32 sr_datalen; /* length of actual data */ + u32 sr_count; /* extent count in reply */ + int sr_ext_len; /* length of extent array */ + struct ceph_sparse_extent *sr_extent; /* extent array */ +}; + /* * A given osd we're communicating with. * @@ -48,6 +78,7 @@ struct ceph_sparse_extent { */ struct ceph_osd { refcount_t o_ref; + int o_sparse_op_idx; struct ceph_osd_client *o_osdc; int o_osd; int o_incarnation; @@ -63,6 +94,7 @@ struct ceph_osd { unsigned long lru_ttl; struct list_head o_keepalive_item; struct mutex lock; + struct ceph_sparse_read o_sparse_read; }; #define CEPH_OSD_SLAB_OPS 2 -- cgit v1.2.3 From dee0c5f834605ce9b384ee8b9c7032ffd8db4eca Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 1 Jul 2022 06:30:12 -0400 Subject: libceph: add new iov_iter-based ceph_msg_data_type and ceph_osd_data_type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an iov_iter to the unions in ceph_msg_data and ceph_msg_data_cursor. Instead of requiring a list of pages or bvecs, we can just use an iov_iter directly, and avoid extra allocations. We assume that the pages represented by the iter are pinned such that they shouldn't incur page faults, which is the case for the iov_iters created by netfs. While working on this, Al Viro informed me that he was going to change iov_iter_get_pages to auto-advance the iterator as that pattern is more or less required for ITER_PIPE anyway. We emulate that here for now by advancing in the _next op and tracking that amount in the "lastlen" field. In the event that _next is called twice without an intervening _advance, we revert the iov_iter by the remaining lastlen before calling iov_iter_get_pages. Cc: Al Viro Cc: David Howells Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 8 ++++++++ include/linux/ceph/osd_client.h | 4 ++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 9fd7255172ad..2eaaabbe98cb 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -123,6 +123,7 @@ enum ceph_msg_data_type { CEPH_MSG_DATA_BIO, /* data source/destination is a bio list */ #endif /* CONFIG_BLOCK */ CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */ + CEPH_MSG_DATA_ITER, /* data source/destination is an iov_iter */ }; #ifdef CONFIG_BLOCK @@ -224,6 +225,7 @@ struct ceph_msg_data { bool own_pages; }; struct ceph_pagelist *pagelist; + struct iov_iter iter; }; }; @@ -248,6 +250,10 @@ struct ceph_msg_data_cursor { struct page *page; /* page from list */ size_t offset; /* bytes from list */ }; + struct { + struct iov_iter iov_iter; + unsigned int lastlen; + }; }; }; @@ -605,6 +611,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, #endif /* CONFIG_BLOCK */ void ceph_msg_data_add_bvecs(struct ceph_msg *msg, struct ceph_bvec_iter *bvec_pos); +void ceph_msg_data_add_iter(struct ceph_msg *msg, + struct iov_iter *iter); struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items, gfp_t flags, bool can_fail); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index bfa4813590da..8f5d2b5bbba2 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -108,6 +108,7 @@ enum ceph_osd_data_type { CEPH_OSD_DATA_TYPE_BIO, #endif /* CONFIG_BLOCK */ CEPH_OSD_DATA_TYPE_BVECS, + CEPH_OSD_DATA_TYPE_ITER, }; struct ceph_osd_data { @@ -131,6 +132,7 @@ struct ceph_osd_data { struct ceph_bvec_iter bvec_pos; u32 num_bvecs; }; + struct iov_iter iter; }; }; @@ -501,6 +503,8 @@ void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req, void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, unsigned int which, struct ceph_bvec_iter *bvec_pos); +void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, + unsigned int which, struct iov_iter *iter); extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, unsigned int which, -- cgit v1.2.3 From 2d332d5bc424404911540006a8bb450fbb96b178 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 27 Jul 2020 10:16:09 -0400 Subject: ceph: fscrypt_auth handling for ceph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most fscrypt-enabled filesystems store the crypto context in an xattr, but that's problematic for ceph as xatts are governed by the XATTR cap, but we really want the crypto context as part of the AUTH cap. Because of this, the MDS has added two new inode metadata fields: fscrypt_auth and fscrypt_file. The former is used to hold the crypto context, and the latter is used to track the real file size. Parse new fscrypt_auth and fscrypt_file fields in inode traces. For now, we don't use fscrypt_file, but fscrypt_auth is used to hold the fscrypt context. Allow the client to use a setattr request for setting the fscrypt_auth field. Since this is not a standard setattr request from the VFS, we add a new field to __ceph_setattr that carries ceph-specific inode attrs. Have the set_context op do a setattr that sets the fscrypt_auth value, and get_context just return the contents of that field (since it should always be available). Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 49586ff26152..45f8ce61e103 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -359,14 +359,19 @@ enum { extern const char *ceph_mds_op_name(int op); - -#define CEPH_SETATTR_MODE 1 -#define CEPH_SETATTR_UID 2 -#define CEPH_SETATTR_GID 4 -#define CEPH_SETATTR_MTIME 8 -#define CEPH_SETATTR_ATIME 16 -#define CEPH_SETATTR_SIZE 32 -#define CEPH_SETATTR_CTIME 64 +#define CEPH_SETATTR_MODE (1 << 0) +#define CEPH_SETATTR_UID (1 << 1) +#define CEPH_SETATTR_GID (1 << 2) +#define CEPH_SETATTR_MTIME (1 << 3) +#define CEPH_SETATTR_ATIME (1 << 4) +#define CEPH_SETATTR_SIZE (1 << 5) +#define CEPH_SETATTR_CTIME (1 << 6) +#define CEPH_SETATTR_MTIME_NOW (1 << 7) +#define CEPH_SETATTR_ATIME_NOW (1 << 8) +#define CEPH_SETATTR_BTIME (1 << 9) +#define CEPH_SETATTR_KILL_SGUID (1 << 10) +#define CEPH_SETATTR_FSCRYPT_AUTH (1 << 11) +#define CEPH_SETATTR_FSCRYPT_FILE (1 << 12) /* * Ceph setxattr request flags. -- cgit v1.2.3 From 69dd3b3930f96b624228000921f417fb0919a6ab Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 25 Aug 2022 09:31:05 -0400 Subject: libceph: add CEPH_OSD_OP_ASSERT_VER support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ...and record the user_version in the reply in a new field in ceph_osd_request, so we can populate the assert_ver appropriately. Shuffle the fields a bit too so that the new field fits in an existing hole on x86_64. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 6 +++++- include/linux/ceph/rados.h | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 8f5d2b5bbba2..bf9823956758 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -198,6 +198,9 @@ struct ceph_osd_req_op { u32 src_fadvise_flags; struct ceph_osd_data osd_data; } copy_from; + struct { + u64 ver; + } assert_ver; }; }; @@ -252,6 +255,7 @@ struct ceph_osd_request { struct ceph_osd_client *r_osdc; struct kref r_kref; bool r_mempool; + bool r_linger; /* don't resend on failure */ struct completion r_completion; /* private to osd_client.c */ ceph_osdc_callback_t r_callback; @@ -264,9 +268,9 @@ struct ceph_osd_request { struct ceph_snap_context *r_snapc; /* for writes */ struct timespec64 r_mtime; /* ditto */ u64 r_data_offset; /* ditto */ - bool r_linger; /* don't resend on failure */ /* internal */ + u64 r_version; /* data version sent in reply */ unsigned long r_stamp; /* jiffies, send or check time */ unsigned long r_start_stamp; /* jiffies */ ktime_t r_start_latency; /* ktime_t */ diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 43a7a1573b51..73c3efbec36c 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -523,6 +523,10 @@ struct ceph_osd_op { struct { __le64 cookie; } __attribute__ ((packed)) notify; + struct { + __le64 unused; + __le64 ver; + } __attribute__ ((packed)) assert_ver; struct { __le64 offset, length; __le64 src_offset; -- cgit v1.2.3 From 781589e40ac5f929f58824c15448e1ba49c3ac32 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 17 Aug 2023 15:55:31 -0700 Subject: rtc: Add support for limited alarm timer offsets Some alarm timers are based on time offsets, not on absolute times. In some situations, the amount of time that can be scheduled in the future is limited. This may result in a refusal to suspend the system, causing substantial battery drain. Some RTC alarm drivers remedy the situation by setting the alarm time to the maximum supported time if a request for an out-of-range timeout is made. This is not really desirable since it may result in unexpected early wakeups. To reduce the impact of this problem, let RTC drivers report the maximum supported alarm timer offset. The code setting alarm timers can then decide if it wants to reject setting alarm timers to a larger value, if it wants to implement recurring alarms until the actually requested alarm time is met, or if it wants to accept the limited alarm time. Only introduce the necessary variable into struct rtc_device. Code to set and use the variable will follow with subsequent patches. Cc: Brian Norris Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20230817225537.4053865-2-linux@roeck-us.net Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 1fd9c6a21ebe..4c0bcbeb1f00 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -146,6 +146,7 @@ struct rtc_device { time64_t range_min; timeu64_t range_max; + timeu64_t alarm_offset_max; time64_t start_secs; time64_t offset_secs; bool set_start_time; -- cgit v1.2.3 From 35d8dbbb25add265a880ab0dc48a229f06b08325 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 29 Aug 2023 20:46:31 +0200 Subject: thermal: core: Drop unused .get_trip_*() callbacks After recent changes in the ACPI thermal driver and in the Intel DTS IOSF thermal driver, all thermal zone drivers are expected to use trip tables for initialization and none of them should implement .get_trip_type(), .get_trip_temp() or .get_trip_hyst() callbacks, so drop these callbacks entirely from the core. Signed-off-by: Rafael J. Wysocki --- include/linux/thermal.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index b449a46766f5..e6bd3b7c9eda 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -62,11 +62,7 @@ struct thermal_zone_device_ops { int (*set_trips) (struct thermal_zone_device *, int, int); int (*change_mode) (struct thermal_zone_device *, enum thermal_device_mode); - int (*get_trip_type) (struct thermal_zone_device *, int, - enum thermal_trip_type *); - int (*get_trip_temp) (struct thermal_zone_device *, int, int *); int (*set_trip_temp) (struct thermal_zone_device *, int, int); - int (*get_trip_hyst) (struct thermal_zone_device *, int, int *); int (*set_trip_hyst) (struct thermal_zone_device *, int, int); int (*get_crit_temp) (struct thermal_zone_device *, int *); int (*set_emul_temp) (struct thermal_zone_device *, int); -- cgit v1.2.3 From 8289d810ea85755a9d22f75785806cb34eecd5e5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 22 Aug 2023 13:40:06 +0200 Subject: thermal: core: Rework .get_trend() thermal zone callback Passing a struct thermal_trip pointer instead of a trip index to the .get_trend() thermal zone callback allows one of its 2 implementations, the thermal_get_trend() function in the ACPI thermal driver, to be simplified quite a bit, and the other implementation of it in the ti-soc-thermal driver does not even use the relevant callback argument. For this reason, change the .get_trend() thermal zone callback definition and adjust the related code accordingly. Signed-off-by: Rafael J. Wysocki --- include/linux/thermal.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index e6bd3b7c9eda..eb17495c8acc 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -53,6 +53,20 @@ enum thermal_notify_event { THERMAL_EVENT_KEEP_ALIVE, /* Request for user space handler to respond */ }; +/** + * struct thermal_trip - representation of a point in temperature domain + * @temperature: temperature value in miliCelsius + * @hysteresis: relative hysteresis in miliCelsius + * @type: trip point type + * @priv: pointer to driver data associated with this trip + */ +struct thermal_trip { + int temperature; + int hysteresis; + enum thermal_trip_type type; + void *priv; +}; + struct thermal_zone_device_ops { int (*bind) (struct thermal_zone_device *, struct thermal_cooling_device *); @@ -66,26 +80,12 @@ struct thermal_zone_device_ops { int (*set_trip_hyst) (struct thermal_zone_device *, int, int); int (*get_crit_temp) (struct thermal_zone_device *, int *); int (*set_emul_temp) (struct thermal_zone_device *, int); - int (*get_trend) (struct thermal_zone_device *, int, + int (*get_trend) (struct thermal_zone_device *, struct thermal_trip *, enum thermal_trend *); void (*hot)(struct thermal_zone_device *); void (*critical)(struct thermal_zone_device *); }; -/** - * struct thermal_trip - representation of a point in temperature domain - * @temperature: temperature value in miliCelsius - * @hysteresis: relative hysteresis in miliCelsius - * @type: trip point type - * @priv: pointer to driver data associated with this trip - */ -struct thermal_trip { - int temperature; - int hysteresis; - enum thermal_trip_type type; - void *priv; -}; - struct thermal_cooling_device_ops { int (*get_max_state) (struct thermal_cooling_device *, unsigned long *); int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *); -- cgit v1.2.3 From 218a06a79d9a98a96ef46bb003d4d8adb0962056 Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Tue, 22 Aug 2023 20:48:37 +0800 Subject: cpufreq: Support per-policy performance boost The boost control currently applies to the whole system. However, users may prefer to boost a subset of cores in order to provide prioritized performance to workloads running on the boosted cores. Enable per-policy boost by adding a 'boost' sysfs interface under each policy path. This can be found at: /sys/devices/system/cpu/cpufreq/policy<*>/boost Same to the global boost switch, writing 1/0 to the per-policy 'boost' enables/disables boost on a cpufreq policy respectively. The user view of global and per-policy boost controls should be: 1. Enabling global boost initially enables boost on all policies, and per-policy boost can then be enabled or disabled individually, given that the platform does support so. 2. Disabling global boost makes the per-policy boost interface illegal. Signed-off-by: Jie Zhan Reviewed-by: Wei Xu Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 43b363a99215..71d186d6933a 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -141,6 +141,9 @@ struct cpufreq_policy { */ bool dvfs_possible_from_any_cpu; + /* Per policy boost enabled flag. */ + bool boost_enabled; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; unsigned int cached_resolved_idx; -- cgit v1.2.3 From 7e9be1124dbe7888907e82cab20164578e3f9ab7 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 29 Aug 2023 19:51:57 +0200 Subject: netfilter: nf_tables: Audit log setelem reset Since set element reset is not integrated into nf_tables' transaction logic, an explicit log call is needed, similar to NFT_MSG_GETOBJ_RESET handling. For the sake of simplicity, catchall element reset will always generate a dedicated log entry. This relieves nf_tables_dump_set() from having to adjust the logged element count depending on whether a catchall element was found or not. Fixes: 079cd633219d7 ("netfilter: nf_tables: Introduce NFT_MSG_GETSETELEM_RESET") Signed-off-by: Phil Sutter Reviewed-by: Richard Guy Briggs Signed-off-by: Pablo Neira Ayuso --- include/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 6a3a9e122bb5..192bf03aacc5 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -117,6 +117,7 @@ enum audit_nfcfgop { AUDIT_NFT_OP_OBJ_RESET, AUDIT_NFT_OP_FLOWTABLE_REGISTER, AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, + AUDIT_NFT_OP_SETELEM_RESET, AUDIT_NFT_OP_INVALID, }; -- cgit v1.2.3 From ea078ae9108e25fc881c84369f7c03931d22e555 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 29 Aug 2023 19:51:58 +0200 Subject: netfilter: nf_tables: Audit log rule reset Resetting rules' stateful data happens outside of the transaction logic, so 'get' and 'dump' handlers have to emit audit log entries themselves. Fixes: 8daa8fde3fc3f ("netfilter: nf_tables: Introduce NFT_MSG_GETRULE_RESET") Signed-off-by: Phil Sutter Reviewed-by: Richard Guy Briggs Signed-off-by: Pablo Neira Ayuso --- include/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 192bf03aacc5..51b1b7054a23 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -118,6 +118,7 @@ enum audit_nfcfgop { AUDIT_NFT_OP_FLOWTABLE_REGISTER, AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, AUDIT_NFT_OP_SETELEM_RESET, + AUDIT_NFT_OP_RULE_RESET, AUDIT_NFT_OP_INVALID, }; -- cgit v1.2.3 From 3af5ae22030cb59fab4fba35f5a2b62f47e14df9 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 25 Jul 2023 09:44:40 +0800 Subject: ceph: make members in struct ceph_mds_request_args_ext a union In ceph mainline it will allow to set the btime in the setattr request and just add a 'btime' member in the union 'ceph_mds_request_args' and then bump up the header version to 4. That means the total size of union 'ceph_mds_request_args' will increase sizeof(struct ceph_timespec) bytes, but in kclient it will increase the sizeof(setattr_ext) bytes for each request. Since the MDS will always depend on the header's vesion and front_len members to decode the 'ceph_mds_request_head' struct, at the same time kclient hasn't supported the 'btime' feature yet in setattr request, so it's safe to do this change here. This will save 48 bytes memories for each request. Fixes: 4f1ddb1ea874 ("ceph: implement updated ceph_mds_request_head structure") Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 45f8ce61e103..ae44812eb6d4 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -467,17 +467,19 @@ union ceph_mds_request_args { } __attribute__ ((packed)); union ceph_mds_request_args_ext { - union ceph_mds_request_args old; - struct { - __le32 mode; - __le32 uid; - __le32 gid; - struct ceph_timespec mtime; - struct ceph_timespec atime; - __le64 size, old_size; /* old_size needed by truncate */ - __le32 mask; /* CEPH_SETATTR_* */ - struct ceph_timespec btime; - } __attribute__ ((packed)) setattr_ext; + union { + union ceph_mds_request_args old; + struct { + __le32 mode; + __le32 uid; + __le32 gid; + struct ceph_timespec mtime; + struct ceph_timespec atime; + __le64 size, old_size; /* old_size needed by truncate */ + __le32 mask; /* CEPH_SETATTR_* */ + struct ceph_timespec btime; + } __attribute__ ((packed)) setattr_ext; + }; }; #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ -- cgit v1.2.3 From ce0d5bd3a6c176f9a3bf867624a07119dd4d0878 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 25 Jul 2023 17:51:59 +0800 Subject: ceph: make num_fwd and num_retry to __u32 The num_fwd in MClientRequestForward is int32_t, while the num_fwd in ceph_mds_request_head is __u8. This is buggy when the num_fwd is larger than 256 it will always be truncate to 0 again. But the client couldn't recoginize this. This will make them to __u32 instead. Because the old cephs will directly copy the raw memories when decoding the reqeust's head, so we need to make sure this kclient will be compatible with old cephs. For newer cephs they will decode the requests depending the version, which will be much simpler and easier to extend new members. Link: https://tracker.ceph.com/issues/62145 Signed-off-by: Xiubo Li Reviewed-by: Alexander Mikhalitsyn Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index ae44812eb6d4..5f2301ee88bc 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -486,7 +486,7 @@ union ceph_mds_request_args_ext { #define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ #define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */ -struct ceph_mds_request_head_old { +struct ceph_mds_request_head_legacy { __le64 oldest_client_tid; __le32 mdsmap_epoch; /* on client */ __le32 flags; /* CEPH_MDS_FLAG_* */ @@ -499,9 +499,9 @@ struct ceph_mds_request_head_old { union ceph_mds_request_args args; } __attribute__ ((packed)); -#define CEPH_MDS_REQUEST_HEAD_VERSION 1 +#define CEPH_MDS_REQUEST_HEAD_VERSION 2 -struct ceph_mds_request_head { +struct ceph_mds_request_head_old { __le16 version; /* struct version */ __le64 oldest_client_tid; __le32 mdsmap_epoch; /* on client */ @@ -515,6 +515,23 @@ struct ceph_mds_request_head { union ceph_mds_request_args_ext args; } __attribute__ ((packed)); +struct ceph_mds_request_head { + __le16 version; /* struct version */ + __le64 oldest_client_tid; + __le32 mdsmap_epoch; /* on client */ + __le32 flags; /* CEPH_MDS_FLAG_* */ + __u8 num_retry, num_fwd; /* legacy count retry and fwd attempts */ + __le16 num_releases; /* # include cap/lease release records */ + __le32 op; /* mds op code */ + __le32 caller_uid, caller_gid; + __le64 ino; /* use this ino for openc, mkdir, mknod, + etc. (if replaying) */ + union ceph_mds_request_args_ext args; + + __le32 ext_num_retry; /* new count retry attempts */ + __le32 ext_num_fwd; /* new count fwd attempts */ +} __attribute__ ((packed)); + /* cap/lease release record */ struct ceph_mds_request_release { __le64 ino, cap_id; /* ino and unique cap id */ -- cgit v1.2.3 From 52e322eda3d475614210efbc0f2793a1da9d367a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 28 Jul 2023 17:47:22 -0700 Subject: KVM: x86/mmu: BUG() in rmap helpers iff CONFIG_BUG_ON_DATA_CORRUPTION=y Introduce KVM_BUG_ON_DATA_CORRUPTION() and use it in the low-level rmap helpers to convert the existing BUG()s to WARN_ON_ONCE() when the kernel is built with CONFIG_BUG_ON_DATA_CORRUPTION=n, i.e. does NOT want to BUG() on corruption of host kernel data structures. Environments that don't have infrastructure to automatically capture crash dumps, i.e. aren't likely to enable CONFIG_BUG_ON_DATA_CORRUPTION=y, are typically better served overall by WARN-and-continue behavior (for the kernel, the VM is dead regardless), as a BUG() while holding mmu_lock all but guarantees the _best_ case scenario is a panic(). Make the BUG()s conditional instead of removing/replacing them entirely as there's a non-zero chance (though by no means a guarantee) that the damage isn't contained to the target VM, e.g. if no rmap is found for a SPTE then KVM may be double-zapping the SPTE, i.e. has already freed the memory the SPTE pointed at and thus KVM is reading/writing memory that KVM no longer owns. Link: https://lore.kernel.org/all/20221129191237.31447-1-mizhang@google.com Suggested-by: Mingwei Zhang Cc: David Matlack Cc: Jim Mattson Reviewed-by: Mingwei Zhang Link: https://lore.kernel.org/r/20230729004722.1056172-13-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1b583f35547e..fb6c6109fdca 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -867,6 +867,25 @@ static inline void kvm_vm_bugged(struct kvm *kvm) unlikely(__ret); \ }) +/* + * Note, "data corruption" refers to corruption of host kernel data structures, + * not guest data. Guest data corruption, suspected or confirmed, that is tied + * and contained to a single VM should *never* BUG() and potentially panic the + * host, i.e. use this variant of KVM_BUG() if and only if a KVM data structure + * is corrupted and that corruption can have a cascading effect to other parts + * of the hosts and/or to other VMs. + */ +#define KVM_BUG_ON_DATA_CORRUPTION(cond, kvm) \ +({ \ + bool __ret = !!(cond); \ + \ + if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) \ + BUG_ON(__ret); \ + else if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \ + kvm_vm_bugged(kvm); \ + unlikely(__ret); \ +}) + static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu) { #ifdef CONFIG_PROVE_RCU -- cgit v1.2.3 From 6a86b5b5cd76d2734304a0173f5f01aa8aa2025e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 29 Aug 2023 22:53:52 +0200 Subject: bpf: Annotate bpf_long_memcpy with data_race syzbot reported a data race splat between two processes trying to update the same BPF map value via syscall on different CPUs: BUG: KCSAN: data-race in bpf_percpu_array_update / bpf_percpu_array_update write to 0xffffe8fffe7425d8 of 8 bytes by task 8257 on cpu 1: bpf_long_memcpy include/linux/bpf.h:428 [inline] bpf_obj_memcpy include/linux/bpf.h:441 [inline] copy_map_value_long include/linux/bpf.h:464 [inline] bpf_percpu_array_update+0x3bb/0x500 kernel/bpf/arraymap.c:380 bpf_map_update_value+0x190/0x370 kernel/bpf/syscall.c:175 generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1749 bpf_map_do_batch+0x2df/0x3d0 kernel/bpf/syscall.c:4648 __sys_bpf+0x28a/0x780 __do_sys_bpf kernel/bpf/syscall.c:5241 [inline] __se_sys_bpf kernel/bpf/syscall.c:5239 [inline] __x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5239 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd write to 0xffffe8fffe7425d8 of 8 bytes by task 8268 on cpu 0: bpf_long_memcpy include/linux/bpf.h:428 [inline] bpf_obj_memcpy include/linux/bpf.h:441 [inline] copy_map_value_long include/linux/bpf.h:464 [inline] bpf_percpu_array_update+0x3bb/0x500 kernel/bpf/arraymap.c:380 bpf_map_update_value+0x190/0x370 kernel/bpf/syscall.c:175 generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1749 bpf_map_do_batch+0x2df/0x3d0 kernel/bpf/syscall.c:4648 __sys_bpf+0x28a/0x780 __do_sys_bpf kernel/bpf/syscall.c:5241 [inline] __se_sys_bpf kernel/bpf/syscall.c:5239 [inline] __x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5239 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x0000000000000000 -> 0xfffffff000002788 The bpf_long_memcpy is used with 8-byte aligned pointers, power-of-8 size and forced to use long read/writes to try to atomically copy long counters. It is best-effort only and no barriers are here since it _will_ race with concurrent updates from BPF programs. The bpf_long_memcpy() is called from bpf(2) syscall. Marco suggested that the best way to make this known to KCSAN would be to use data_race() annotation. Reported-by: syzbot+97522333291430dd277f@syzkaller.appspotmail.com Suggested-by: Marco Elver Signed-off-by: Daniel Borkmann Acked-by: Marco Elver Link: https://lore.kernel.org/bpf/000000000000d87a7f06040c970c@google.com Link: https://lore.kernel.org/bpf/57628f7a15e20d502247c3b55fceb1cb2b31f266.1693342186.git.daniel@iogearbox.net --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 12596af59c00..024e8b28c34b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -438,7 +438,7 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) size /= sizeof(long); while (size--) - *ldst++ = *lsrc++; + data_race(*ldst++ = *lsrc++); } /* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */ -- cgit v1.2.3 From 8423be8926aa82cd2e28bba5cc96ccb72c7ce6be Mon Sep 17 00:00:00 2001 From: Sriram Yagnaraman Date: Thu, 31 Aug 2023 10:03:31 +0200 Subject: ipv6: ignore dst hint for multipath routes Route hints when the nexthop is part of a multipath group causes packets in the same receive batch to be sent to the same nexthop irrespective of the multipath hash of the packet. So, do not extract route hint for packets whose destination is part of a multipath group. A new SKB flag IP6SKB_MULTIPATH is introduced for this purpose, set the flag when route is looked up in fib6_select_path() and use it in ip6_can_use_hint() to check for the existence of the flag. Fixes: 197dbf24e360 ("ipv6: introduce and uses route look hints for list input.") Signed-off-by: Sriram Yagnaraman Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5883551b1ee8..af8a771a053c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -147,6 +147,7 @@ struct inet6_skb_parm { #define IP6SKB_JUMBOGRAM 128 #define IP6SKB_SEG6 256 #define IP6SKB_FAKEJUMBO 512 +#define IP6SKB_MULTIPATH 1024 }; #if defined(CONFIG_NET_L3_MASTER_DEV) -- cgit v1.2.3 From 6ad11bc6ed37c6371e9e13619862709b6529b43a Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 24 Aug 2023 14:38:08 +0300 Subject: rmap: remove anon_vma_link() nommu stub anon_vma_link() is unused since commit 5beb49305251 ("mm: change anon_vma linking to fix multi-process server scalability issue"). Link: https://lkml.kernel.org/r/cdce9b00c9ab15f6d02eddf40dcad537d3e9676f.1692877089.git.baruch@tkos.co.il Signed-off-by: Baruch Siach Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/rmap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index a3825ce81102..51cc21ebb568 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -479,7 +479,6 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, #define anon_vma_init() do {} while (0) #define anon_vma_prepare(vma) (0) -#define anon_vma_link(vma) do {} while (0) static inline int folio_referenced(struct folio *folio, int is_locked, struct mem_cgroup *memcg, -- cgit v1.2.3 From 719c5e37e99d2fd588d1c994284d17650a66354c Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 1 Sep 2023 06:53:23 +0200 Subject: net: phy: micrel: Correct bit assignments for phy_device flags Previously, the defines for phy_device flags in the Micrel driver were ambiguous in their representation. They were intended to be bit masks but were mistakenly defined as bit positions. This led to the following issues: - MICREL_KSZ8_P1_ERRATA, designated for KSZ88xx switches, overlapped with MICREL_PHY_FXEN and MICREL_PHY_50MHZ_CLK. - Due to this overlap, the code path for MICREL_PHY_FXEN, tailored for the KSZ8041 PHY, was not executed for KSZ88xx PHYs. - Similarly, the code associated with MICREL_PHY_50MHZ_CLK wasn't triggered for KSZ88xx. To rectify this, all three flags have now been explicitly converted to use the `BIT()` macro, ensuring they are defined as bit masks and preventing potential overlaps in the future. Fixes: 49011e0c1555 ("net: phy: micrel: ksz886x/ksz8081: add cabletest support") Signed-off-by: Oleksij Rempel Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/micrel_phy.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 8bef1ab62bba..322d87255984 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -41,9 +41,9 @@ #define PHY_ID_KSZ9477 0x00221631 /* struct phy_device dev_flags definitions */ -#define MICREL_PHY_50MHZ_CLK 0x00000001 -#define MICREL_PHY_FXEN 0x00000002 -#define MICREL_KSZ8_P1_ERRATA 0x00000003 +#define MICREL_PHY_50MHZ_CLK BIT(0) +#define MICREL_PHY_FXEN BIT(1) +#define MICREL_KSZ8_P1_ERRATA BIT(2) #define MICREL_KSZ9021_EXTREG_CTRL 0xB #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC -- cgit v1.2.3 From 0be7592885d7b4c20595c388adc13930b653b847 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Thu, 31 Aug 2023 16:51:45 +0530 Subject: scsi: qla2xxx: Correct endianness for rqstlen and rsplen rqstlen and rsplen were changed to __le32 to fix sparse warnings: drivers/scsi/qla2xxx/qla_nvme.c:402:30: warning: incorrect type in assignment (different base types) drivers/scsi/qla2xxx/qla_nvme.c:402:30: expected restricted __le32 [usertype] cmd_len drivers/scsi/qla2xxx/qla_nvme.c:402:30: got unsigned short [usertype] rsplen drivers/scsi/qla2xxx/qla_nvme.c:507:30: warning: incorrect type in assignment (different base types) drivers/scsi/qla2xxx/qla_nvme.c:507:30: expected restricted __le32 [usertype] cmd_len drivers/scsi/qla2xxx/qla_nvme.c:507:30: got unsigned int [usertype] rqstlen drivers/scsi/qla2xxx/qla_nvme.c:508:30: warning: incorrect type in assignment (different base types) drivers/scsi/qla2xxx/qla_nvme.c:508:30: expected restricted __le32 [usertype] rsp_len drivers/scsi/qla2xxx/qla_nvme.c:508:30: got unsigned int [usertype] rsplen Correct the endianness in qla2xxx driver thus avoiding changes in nvme-fc-driver.h. Fixes: 875386b98857 ("scsi: qla2xxx: Add Unsolicited LS Request and Response Support for NVMe") Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230831112146.32595-1-njavali@marvell.com Signed-off-by: Martin K. Petersen --- include/linux/nvme-fc-driver.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index f6ef8cf5d774..4109f1bd6128 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -53,10 +53,10 @@ struct nvmefc_ls_req { void *rqstaddr; dma_addr_t rqstdma; - __le32 rqstlen; + u32 rqstlen; void *rspaddr; dma_addr_t rspdma; - __le32 rsplen; + u32 rsplen; u32 timeout; void *private; @@ -120,7 +120,7 @@ struct nvmefc_ls_req { struct nvmefc_ls_rsp { void *rspbuf; dma_addr_t rspdma; - __le32 rsplen; + u16 rsplen; void (*done)(struct nvmefc_ls_rsp *rsp); void *nvme_fc_private; /* LLDD is not to access !! */ -- cgit v1.2.3 From cf60ce92358da29f3b1e24005f7b748584b82753 Mon Sep 17 00:00:00 2001 From: Pavel Pisa Date: Mon, 4 Sep 2023 12:00:02 +0200 Subject: of: overlay: Fix of_overlay_fdt_apply prototype when !CONFIG_OF_OVERLAY The of_overlay_fdt_apply has been changed but when CONFIG_OF_OVERLAY support is not configured then old stub prototype is declared by of.h header. Signed-off-by: Pavel Pisa Fixes: 47284862bfc7 ("of: overlay: Extend of_overlay_fdt_apply() to specify the target node") Acked-by: Marc Kleine-Budde Link: https://lore.kernel.org/r/20230904100002.7913-1-pisa@cmp.felk.cvut.cz Signed-off-by: Rob Herring --- include/linux/of.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index ed679819c279..6a9ddf20e79a 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1676,8 +1676,8 @@ int of_overlay_notifier_unregister(struct notifier_block *nb); #else -static inline int of_overlay_fdt_apply(void *overlay_fdt, u32 overlay_fdt_size, - int *ovcs_id) +static inline int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, + int *ovcs_id, struct device_node *target_base) { return -ENOTSUPP; } -- cgit v1.2.3 From 9ffa7b92bc768609243d79fb0c0125b9704c7061 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 30 Aug 2023 18:11:38 +0200 Subject: thermal: core: Clean up headers of thermal zone registration functions For consistency, add a missing thermal_zone_device_register_with_trips() stub for the CONFIG_THERMAL unset case, specify argument names in all of the thermal zone registration and unregistration function headers and make all of them use white space consistently. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- include/linux/thermal.h | 53 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index eb17495c8acc..a30643104f3f 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -300,16 +300,24 @@ int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp); #endif #ifdef CONFIG_THERMAL -struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, - void *, struct thermal_zone_device_ops *, - const struct thermal_zone_params *, int, int); - -void thermal_zone_device_unregister(struct thermal_zone_device *); - -struct thermal_zone_device * -thermal_zone_device_register_with_trips(const char *, struct thermal_trip *, int, int, - void *, struct thermal_zone_device_ops *, - const struct thermal_zone_params *, int, int); +struct thermal_zone_device *thermal_zone_device_register( + const char *type, + int num_trips, int mask, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + int passive_delay, int polling_delay); + +struct thermal_zone_device *thermal_zone_device_register_with_trips( + const char *type, + struct thermal_trip *trips, + int num_trips, int mask, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + int passive_delay, int polling_delay); + +void thermal_zone_device_unregister(struct thermal_zone_device *tz); void *thermal_zone_device_priv(struct thermal_zone_device *tzd); const char *thermal_zone_device_type(struct thermal_zone_device *tzd); @@ -351,14 +359,27 @@ int thermal_zone_device_disable(struct thermal_zone_device *tz); void thermal_zone_device_critical(struct thermal_zone_device *tz); #else static inline struct thermal_zone_device *thermal_zone_device_register( - const char *type, int trips, int mask, void *devdata, - struct thermal_zone_device_ops *ops, - const struct thermal_zone_params *tzp, - int passive_delay, int polling_delay) + const char *type, + int num_trips, int mask, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + int passive_delay, int polling_delay) +{ return ERR_PTR(-ENODEV); } + +static inline struct thermal_zone_device *thermal_zone_device_register_with_trips( + const char *type, + struct thermal_trip *trips, + int num_trips, int mask, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + int passive_delay, int polling_delay) { return ERR_PTR(-ENODEV); } -static inline void thermal_zone_device_unregister( - struct thermal_zone_device *tz) + +static inline void thermal_zone_device_unregister(struct thermal_zone_device *tz) { } + static inline struct thermal_cooling_device * thermal_cooling_device_register(const char *type, void *devdata, const struct thermal_cooling_device_ops *ops) -- cgit v1.2.3 From d332db8fc1a2dfb4738281b1d6d4ed20115dd9d3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 30 Aug 2023 18:13:35 +0200 Subject: thermal: core: Add function for registering tripless thermal zones Multiple callers of thermal_zone_device_register() don't pass any trips to it and they might use a shortened argument list for that, so add a special function with fewer arguments for this purpose. Signed-off-by: Rafael J. Wysocki --- include/linux/thermal.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index a30643104f3f..1a1433eb9b59 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -317,6 +317,12 @@ struct thermal_zone_device *thermal_zone_device_register_with_trips( const struct thermal_zone_params *tzp, int passive_delay, int polling_delay); +struct thermal_zone_device *thermal_tripless_zone_device_register( + const char *type, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp); + void thermal_zone_device_unregister(struct thermal_zone_device *tz); void *thermal_zone_device_priv(struct thermal_zone_device *tzd); @@ -377,6 +383,13 @@ static inline struct thermal_zone_device *thermal_zone_device_register_with_trip int passive_delay, int polling_delay) { return ERR_PTR(-ENODEV); } +static inline struct thermal_zone_device *thermal_tripless_zone_device_register( + const char *type, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp) +{ return ERR_PTR(-ENODEV); } + static inline void thermal_zone_device_unregister(struct thermal_zone_device *tz) { } -- cgit v1.2.3 From edd220b33f479cf9dcda0bfefb2cb8c5902e9885 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 30 Aug 2023 18:16:29 +0200 Subject: thermal: core: Drop thermal_zone_device_register() There are no more users of thermal_zone_device_register(), so drop it from the core. Note that thermal_zone_device_register_with_trips() may be renamed to thermal_zone_device_register() in the future, but only after a grace period allowing all of the possible work in progress that may be using the latter to adjust. Signed-off-by: Rafael J. Wysocki --- include/linux/thermal.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 1a1433eb9b59..c99440aac1a1 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -300,14 +300,6 @@ int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp); #endif #ifdef CONFIG_THERMAL -struct thermal_zone_device *thermal_zone_device_register( - const char *type, - int num_trips, int mask, - void *devdata, - struct thermal_zone_device_ops *ops, - const struct thermal_zone_params *tzp, - int passive_delay, int polling_delay); - struct thermal_zone_device *thermal_zone_device_register_with_trips( const char *type, struct thermal_trip *trips, @@ -364,15 +356,6 @@ int thermal_zone_device_enable(struct thermal_zone_device *tz); int thermal_zone_device_disable(struct thermal_zone_device *tz); void thermal_zone_device_critical(struct thermal_zone_device *tz); #else -static inline struct thermal_zone_device *thermal_zone_device_register( - const char *type, - int num_trips, int mask, - void *devdata, - struct thermal_zone_device_ops *ops, - const struct thermal_zone_params *tzp, - int passive_delay, int polling_delay) -{ return ERR_PTR(-ENODEV); } - static inline struct thermal_zone_device *thermal_zone_device_register_with_trips( const char *type, struct thermal_trip *trips, -- cgit v1.2.3 From e7716c74e3882405f9eca16faa6cb1bf19995399 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 21 Aug 2023 10:21:29 +0200 Subject: xarray: Document necessary flag in alloc functions Adds a new line to the docstrings of functions wrapping __xa_alloc() and __xa_alloc_cyclic(), informing about the necessity of flag XA_FLAGS_ALLOC being set previously. The documentation so far says that functions wrapping __xa_alloc() and __xa_alloc_cyclic() are supposed to return either -ENOMEM or -EBUSY in case of an error. If the xarray has been initialized without the flag XA_FLAGS_ALLOC, however, they fail with a different, undocumented error code. As hinted at in Documentation/core-api/xarray.rst, wrappers around these functions should only be invoked when the flag has been set. The functions' documentation should reflect that as well. Signed-off-by: Philipp Stanner Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 741703b45f61..cb571dfcf4b1 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -856,6 +856,9 @@ static inline int __must_check xa_insert_irq(struct xarray *xa, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or @@ -886,6 +889,9 @@ static inline __must_check int xa_alloc(struct xarray *xa, u32 *id, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or @@ -916,6 +922,9 @@ static inline int __must_check xa_alloc_bh(struct xarray *xa, u32 *id, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or @@ -949,6 +958,9 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id, * The search for an empty entry will start at @next and will wrap * around if necessary. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the @@ -983,6 +995,9 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, * The search for an empty entry will start at @next and will wrap * around if necessary. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the @@ -1017,6 +1032,9 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, * The search for an empty entry will start at @next and will wrap * around if necessary. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the -- cgit v1.2.3 From 39285e124edbc752331e98ace37cc141a6a3747a Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 5 Sep 2023 08:46:10 +0000 Subject: net: team: do not use dynamic lockdep key team interface has used a dynamic lockdep key to avoid false-positive lockdep deadlock detection. Virtual interfaces such as team usually have their own lock for protecting private data. These interfaces can be nested. team0 | team1 Each interface's lock is actually different(team0->lock and team1->lock). So, mutex_lock(&team0->lock); mutex_lock(&team1->lock); mutex_unlock(&team1->lock); mutex_unlock(&team0->lock); The above case is absolutely safe. But lockdep warns about deadlock. Because the lockdep understands these two locks are same. This is a false-positive lockdep warning. So, in order to avoid this problem, the team interfaces started to use dynamic lockdep key. The false-positive problem was fixed, but it introduced a new problem. When the new team virtual interface is created, it registers a dynamic lockdep key(creates dynamic lockdep key) and uses it. But there is the limitation of the number of lockdep keys. So, If so many team interfaces are created, it consumes all lockdep keys. Then, the lockdep stops to work and warns about it. In order to fix this problem, team interfaces use the subclass instead of the dynamic key. So, when a new team interface is created, it doesn't register(create) a new lockdep, but uses existed subclass key instead. It is already used by the bonding interface for a similar case. As the bonding interface does, the subclass variable is the same as the 'dev->nested_level'. This variable indicates the depth in the stacked interface graph. The 'dev->nested_level' is protected by RTNL and RCU. So, 'mutex_lock_nested()' for 'team->lock' requires RTNL or RCU. In the current code, 'team->lock' is usually acquired under RTNL, there is no problem with using 'dev->nested_level'. The 'team_nl_team_get()' and The 'lb_stats_refresh()' functions acquire 'team->lock' without RTNL. But these don't iterate their own ports nested so they don't need nested lock. Reproducer: for i in {0..1000} do ip link add team$i type team ip link add dummy$i master team$i type dummy ip link set dummy$i up ip link set team$i up done Splat looks like: BUG: MAX_LOCKDEP_ENTRIES too low! turning off the locking correctness validator. Please attach the output of /proc/lock_stat to the bug report CPU: 0 PID: 4104 Comm: ip Not tainted 6.5.0-rc7+ #45 Call Trace: dump_stack_lvl+0x64/0xb0 add_lock_to_list+0x30d/0x5e0 check_prev_add+0x73a/0x23a0 ... sock_def_readable+0xfe/0x4f0 netlink_broadcast+0x76b/0xac0 nlmsg_notify+0x69/0x1d0 dev_open+0xed/0x130 ... Reported-by: syzbot+9bbbacfbf1e04d5221f7@syzkaller.appspotmail.com Fixes: 369f61bee0f5 ("team: fix nested locking lockdep warning") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- include/linux/if_team.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 1b9b15a492fa..12d4447fc8ab 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -221,10 +221,38 @@ struct team { atomic_t count_pending; struct delayed_work dw; } mcast_rejoin; - struct lock_class_key team_lock_key; long mode_priv[TEAM_MODE_PRIV_LONGS]; }; +static inline void __team_lock(struct team *team) +{ + mutex_lock(&team->lock); +} + +static inline int team_trylock(struct team *team) +{ + return mutex_trylock(&team->lock); +} + +#ifdef CONFIG_LOCKDEP +static inline void team_lock(struct team *team) +{ + ASSERT_RTNL(); + mutex_lock_nested(&team->lock, team->dev->nested_level); +} + +#else +static inline void team_lock(struct team *team) +{ + __team_lock(team); +} +#endif + +static inline void team_unlock(struct team *team) +{ + mutex_unlock(&team->lock); +} + static inline int team_dev_queue_xmit(struct team *team, struct team_port *port, struct sk_buff *skb) { -- cgit v1.2.3 From 1a961e74d5abbea049588a3d74b759955b4ed9d5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Sep 2023 16:42:02 -0700 Subject: net: phylink: fix sphinx complaint about invalid literal sphinx complains about the use of "%PHYLINK_PCS_NEG_*": Documentation/networking/kapi:144: ./include/linux/phylink.h:601: WARNING: Inline literal start-string without end-string. Documentation/networking/kapi:144: ./include/linux/phylink.h:633: WARNING: Inline literal start-string without end-string. These are not valid symbols so drop the '%' prefix. Alternatively we could use %PHYLINK_PCS_NEG_\* (escape the *) or use normal literal ``PHYLINK_PCS_NEG_*`` but there is already a handful of un-adorned DEFINE_* in this file. Fixes: f99d471afa03 ("net: phylink: add PCS negotiation mode") Reported-by: Stephen Rothwell Link: https://lore.kernel.org/all/20230626162908.2f149f98@canb.auug.org.au/ Signed-off-by: Jakub Kicinski Reviewed-by: Bagas Sanjaya Signed-off-by: David S. Miller --- include/linux/phylink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 7d07f8736431..2b886ea654bb 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -600,7 +600,7 @@ void pcs_get_state(struct phylink_pcs *pcs, * * The %neg_mode argument should be tested via the phylink_mode_*() family of * functions, or for PCS that set pcs->neg_mode true, should be tested - * against the %PHYLINK_PCS_NEG_* definitions. + * against the PHYLINK_PCS_NEG_* definitions. */ int pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, @@ -630,7 +630,7 @@ void pcs_an_restart(struct phylink_pcs *pcs); * * The %mode argument should be tested via the phylink_mode_*() family of * functions, or for PCS that set pcs->neg_mode true, should be tested - * against the %PHYLINK_PCS_NEG_* definitions. + * against the PHYLINK_PCS_NEG_* definitions. */ void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); -- cgit v1.2.3 From 8f3f06dfd6873135068ccf1a0b386308e8c4da38 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 6 Sep 2023 22:53:55 +0800 Subject: raid6: Add LoongArch SIMD syndrome calculation The algorithms work on 64 bytes at a time, which is the L1 cache line size of all current and future LoongArch cores (that we care about), as confirmed by Huacai. The code is based on the generic int.uc algorithm, unrolled 4 times for LSX and 2 times for LASX. Further unrolling does not meaningfully improve the performance according to experiments. Performance numbers measured during system boot on a 3A5000 @ 2.5GHz: > raid6: lasx gen() 12726 MB/s > raid6: lsx gen() 10001 MB/s > raid6: int64x8 gen() 2876 MB/s > raid6: int64x4 gen() 3867 MB/s > raid6: int64x2 gen() 2531 MB/s > raid6: int64x1 gen() 1945 MB/s Comparison of xor() speeds (from different boots but meaningful anyway): > lasx: 11226 MB/s > lsx: 6395 MB/s > int64x4: 2147 MB/s Performance as measured by raid6test: > raid6: lasx gen() 25109 MB/s > raid6: lsx gen() 13233 MB/s > raid6: int64x8 gen() 4164 MB/s > raid6: int64x4 gen() 6005 MB/s > raid6: int64x2 gen() 5781 MB/s > raid6: int64x1 gen() 4119 MB/s > raid6: using algorithm lasx gen() 25109 MB/s > raid6: .... xor() 14439 MB/s, rmw enabled Acked-by: Song Liu Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- include/linux/raid/pq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index f29aaaf2eb21..874447485848 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -108,6 +108,8 @@ extern const struct raid6_calls raid6_vpermxor1; extern const struct raid6_calls raid6_vpermxor2; extern const struct raid6_calls raid6_vpermxor4; extern const struct raid6_calls raid6_vpermxor8; +extern const struct raid6_calls raid6_lsx; +extern const struct raid6_calls raid6_lasx; struct raid6_recov_calls { void (*data2)(int, size_t, int, int, void **); -- cgit v1.2.3 From f2091321044d9fbcadb93dfc1c9cf23e563ea40c Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 6 Sep 2023 22:53:55 +0800 Subject: raid6: Add LoongArch SIMD recovery implementation Similar to the syndrome calculation, the recovery algorithms also work on 64 bytes at a time to align with the L1 cache line size of current and future LoongArch cores (that we care about). Which means unrolled-by-4 LSX and unrolled-by-2 LASX code. The assembly is originally based on the x86 SSSE3/AVX2 ports, but register allocation has been redone to take advantage of LSX/LASX's 32 vector registers, and instruction sequence has been optimized to suit (e.g. LoongArch can perform per-byte srl and andi on vectors, but x86 cannot). Performance numbers measured by instrumenting the raid6test code, on a 3A5000 system clocked at 2.5GHz: > lasx 2data: 354.987 MiB/s > lasx datap: 350.430 MiB/s > lsx 2data: 340.026 MiB/s > lsx datap: 337.318 MiB/s > intx1 2data: 164.280 MiB/s > intx1 datap: 187.966 MiB/s Because recovery algorithms are chosen solely based on priority and availability, lasx is marked as priority 2 and lsx priority 1. At least for the current generation of LoongArch micro-architectures, LASX should always be faster than LSX whenever supported, and have similar power consumption characteristics (because the only known LASX-capable uarch, the LA464, always compute the full 256-bit result for vector ops). Acked-by: Song Liu Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- include/linux/raid/pq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 874447485848..006e18decfad 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -125,6 +125,8 @@ extern const struct raid6_recov_calls raid6_recov_avx2; extern const struct raid6_recov_calls raid6_recov_avx512; extern const struct raid6_recov_calls raid6_recov_s390xc; extern const struct raid6_recov_calls raid6_recov_neon; +extern const struct raid6_recov_calls raid6_recov_lsx; +extern const struct raid6_recov_calls raid6_recov_lasx; extern const struct raid6_calls raid6_neonx1; extern const struct raid6_calls raid6_neonx2; -- cgit v1.2.3 From 9b04c764af18a1dab6d48ca0671f70cdcccf90a2 Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Wed, 6 Sep 2023 22:54:16 +0800 Subject: kasan: Add __HAVE_ARCH_SHADOW_MAP to support arch specific mapping MIPS, LoongArch and some other architectures have many holes between different segments and the valid address space (256T available) is insufficient to map all these segments to kasan shadow memory with the common formula provided by kasan core. So we need architecture specific mapping formulas to ensure different segments are mapped individually, and only limited space lengths of those specific segments are mapped to shadow. Therefore, when the incoming address is converted to a shadow, we need to add a condition to determine whether it is valid. Reviewed-by: Andrey Konovalov Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- include/linux/kasan.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 819b6bc8ac08..3df5499f7936 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -54,11 +54,13 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; int kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end); +#ifndef __HAVE_ARCH_SHADOW_MAP static inline void *kasan_mem_to_shadow(const void *addr) { return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET; } +#endif int kasan_add_zero_shadow(void *start, unsigned long size); void kasan_remove_zero_shadow(void *start, unsigned long size); -- cgit v1.2.3 From 08c6d8bae48c2c28f7017d7b61b5d5a1518ceb39 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Tue, 5 Sep 2023 11:33:15 +0200 Subject: net: phy: Provide Module 4 KSZ9477 errata (DS80000754C) The KSZ9477 errata points out (in 'Module 4') the link up/down problems when EEE (Energy Efficient Ethernet) is enabled in the device to which the KSZ9477 tries to auto negotiate. The suggested workaround is to clear advertisement of EEE for PHYs in this chip driver. To avoid regressions with other switch ICs the new MICREL_NO_EEE flag has been introduced. Moreover, the in-register disablement of MMD_DEVICE_ID_EEE_ADV.MMD_EEE_ADV MMD register is removed, as this code is both; now executed too late (after previous rework of the PHY and DSA for KSZ switches) and not required as setting all members of eee_broken_modes bit field prevents the KSZ9477 from advertising EEE. Fixes: 69d3b36ca045 ("net: dsa: microchip: enable EEE support") # for KSZ9477 Signed-off-by: Lukasz Majewski Tested-by: Oleksij Rempel # Confirmed disabled EEE with oscilloscope. Reviewed-by: Oleksij Rempel Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230905093315.784052-1-lukma@denx.de Signed-off-by: Jakub Kicinski --- include/linux/micrel_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 322d87255984..4e27ca7c49de 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -44,6 +44,7 @@ #define MICREL_PHY_50MHZ_CLK BIT(0) #define MICREL_PHY_FXEN BIT(1) #define MICREL_KSZ8_P1_ERRATA BIT(2) +#define MICREL_NO_EEE BIT(3) #define MICREL_KSZ9021_EXTREG_CTRL 0xB #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC -- cgit v1.2.3 From 6afcf0fb92701487421aa73c692855aa70fbc796 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 7 Sep 2023 11:01:04 -0700 Subject: Revert "net: team: do not use dynamic lockdep key" This reverts commit 39285e124edbc752331e98ace37cc141a6a3747a. Looks like the change has unintended consequences in exposing objects before they are initialized. Let's drop this patch and try again in net-next. Reported-by: syzbot+44ae022028805f4600fc@syzkaller.appspotmail.com Fixes: 39285e124edb ("net: team: do not use dynamic lockdep key") Link: https://lore.kernel.org/all/20230907103124.6adb7256@kernel.org/ Signed-off-by: Jakub Kicinski --- include/linux/if_team.h | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 12d4447fc8ab..1b9b15a492fa 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -221,38 +221,10 @@ struct team { atomic_t count_pending; struct delayed_work dw; } mcast_rejoin; + struct lock_class_key team_lock_key; long mode_priv[TEAM_MODE_PRIV_LONGS]; }; -static inline void __team_lock(struct team *team) -{ - mutex_lock(&team->lock); -} - -static inline int team_trylock(struct team *team) -{ - return mutex_trylock(&team->lock); -} - -#ifdef CONFIG_LOCKDEP -static inline void team_lock(struct team *team) -{ - ASSERT_RTNL(); - mutex_lock_nested(&team->lock, team->dev->nested_level); -} - -#else -static inline void team_lock(struct team *team) -{ - __team_lock(team); -} -#endif - -static inline void team_unlock(struct team *team) -{ - mutex_unlock(&team->lock); -} - static inline int team_dev_queue_xmit(struct team *team, struct team_port *port, struct sk_buff *skb) { -- cgit v1.2.3 From f94cf2206b066bd6d761d3347fd35f77b828c376 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 7 Sep 2023 09:40:07 -0400 Subject: buffer: Make bh_offset() work for compound pages If the buffer pointed to by the buffer_head is part of a compound page, bh_offset() assumes that b_page is the precise page that contains the data. A recent change to jbd2 inadvertently violated that assumption. By using page_size(), we support both b_page being set to the head page (as page_size() will return the size of the entire folio) and the precise page (as it will return PAGE_SIZE for a tail page). Fixes: 8147c4c4546f ("jbd2: use a folio in jbd2_journal_write_metadata_buffer()") Reported-by: Zorro Lang Tested-by: Ritesh Harjani (IBM) Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/buffer_head.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 6cb3e9af78c9..4ba242073adc 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -173,7 +173,10 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh) return test_bit_acquire(BH_Uptodate, &bh->b_state); } -#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) +static inline unsigned long bh_offset(const struct buffer_head *bh) +{ + return (unsigned long)(bh)->b_data & (page_size(bh->b_page) - 1); +} /* If we *know* page->private refers to buffer_heads */ #define page_buffers(page) \ -- cgit v1.2.3 From 41a5db8d8161457b121a03fde999ff6e00090ee2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 27 Aug 2023 08:27:34 -0700 Subject: bpf: Add support for non-fix-size percpu mem allocation This is needed for later percpu mem allocation when the allocation is done by bpf program. For such cases, a global bpf_global_percpu_ma is added where a flexible allocation size is needed. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20230827152734.1995725-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 024e8b28c34b..440dd1f59a1c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -55,8 +55,8 @@ struct cgroup; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; extern struct kobject *btf_kobj; -extern struct bpf_mem_alloc bpf_global_ma; -extern bool bpf_global_ma_set; +extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; +extern bool bpf_global_ma_set, bpf_global_percpu_ma_set; typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, -- cgit v1.2.3 From 55db92f42fe4a4ef7b4c2b4960c6212c8512dd53 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 27 Aug 2023 08:27:39 -0700 Subject: bpf: Add BPF_KPTR_PERCPU as a field type BPF_KPTR_PERCPU represents a percpu field type like below struct val_t { ... fields ... }; struct t { ... struct val_t __percpu_kptr *percpu_data_ptr; ... }; where #define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr"))) While BPF_KPTR_REF points to a trusted kernel object or a trusted local object, BPF_KPTR_PERCPU points to a trusted local percpu object. This patch added basic support for BPF_KPTR_PERCPU related to percpu_kptr field parsing, recording and free operations. BPF_KPTR_PERCPU also supports the same map types as BPF_KPTR_REF does. Note that unlike a local kptr, it is possible that a BPF_KTPR_PERCPU struct may not contain any special fields like other kptr, bpf_spin_lock, bpf_list_head, etc. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20230827152739.1996391-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 440dd1f59a1c..87eeb3a46a1d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -180,14 +180,15 @@ enum btf_field_type { BPF_TIMER = (1 << 1), BPF_KPTR_UNREF = (1 << 2), BPF_KPTR_REF = (1 << 3), - BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, - BPF_LIST_HEAD = (1 << 4), - BPF_LIST_NODE = (1 << 5), - BPF_RB_ROOT = (1 << 6), - BPF_RB_NODE = (1 << 7), + BPF_KPTR_PERCPU = (1 << 4), + BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU, + BPF_LIST_HEAD = (1 << 5), + BPF_LIST_NODE = (1 << 6), + BPF_RB_ROOT = (1 << 7), + BPF_RB_NODE = (1 << 8), BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | BPF_RB_NODE | BPF_RB_ROOT, - BPF_REFCOUNT = (1 << 8), + BPF_REFCOUNT = (1 << 9), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -300,6 +301,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: return "kptr"; + case BPF_KPTR_PERCPU: + return "percpu_kptr"; case BPF_LIST_HEAD: return "bpf_list_head"; case BPF_LIST_NODE: @@ -325,6 +328,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_timer); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: return sizeof(u64); case BPF_LIST_HEAD: return sizeof(struct bpf_list_head); @@ -351,6 +355,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_timer); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: return __alignof__(u64); case BPF_LIST_HEAD: return __alignof__(struct bpf_list_head); @@ -389,6 +394,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) case BPF_TIMER: case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: break; default: WARN_ON_ONCE(1); -- cgit v1.2.3 From 01cc55af93884f1ff5a883426e1924378dfcc62a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 27 Aug 2023 08:27:49 -0700 Subject: bpf: Add bpf_this_cpu_ptr/bpf_per_cpu_ptr support for allocated percpu obj The bpf helpers bpf_this_cpu_ptr() and bpf_per_cpu_ptr() are re-purposed for allocated percpu objects. For an allocated percpu obj, the reg type is 'PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU'. The return type for these two re-purposed helpera is 'PTR_TO_MEM | MEM_RCU | MEM_ALLOC'. The MEM_ALLOC allows that the per-cpu data can be read and written. Since the memory allocator bpf_mem_alloc() returns a ptr to a percpu ptr for percpu data, the first argument of bpf_this_cpu_ptr() and bpf_per_cpu_ptr() is patched with a dereference before passing to the helper func. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20230827152749.1997202-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b6e58dab8e27..a3236651ec64 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -480,6 +480,7 @@ struct bpf_insn_aux_data { bool zext_dst; /* this insn zero extends dst reg */ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ bool is_iter_next; /* bpf_iter__next() kfunc call */ + bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */ u8 alu_state; /* used in combination with alu_limit */ /* below fields are initialized once */ -- cgit v1.2.3 From 6fdac58c560e4d164eb8161987bee045147cabe4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 7 Sep 2023 22:19:12 -0400 Subject: tracing: Remove unused trace_event_file dir field Now that eventfs structure is used to create the events directory via the eventfs dynamically allocate code, the "dir" field of the trace_event_file structure is no longer used. Remove it. Link: https://lkml.kernel.org/r/20230908022001.580400115@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Cc: Ajay Kaher Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index eb5c3add939b..12f875e9e69a 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -650,7 +650,6 @@ struct trace_event_file { struct trace_event_call *event_call; struct event_filter __rcu *filter; struct eventfs_file *ef; - struct dentry *dir; struct trace_array *tr; struct trace_subsystem_dir *system; struct list_head triggers; -- cgit v1.2.3 From 5d153cd128251aaedc8e9657f0a949ec94952055 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 8 Sep 2023 16:34:59 -0500 Subject: spnego: add missing OID to oid registry Add missing OID to the registry. Some servers and clients (including Windows) now request "NEGOEX - SPNEGEO Extended Negotiation Security") See https://datatracker.ietf.org/doc/html/draft-zhu-negoex-02 Reviewed-by: Namjae Jeon Signed-off-by: Steve French --- include/linux/oid_registry.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 0f4a8903922a..f86a08ba0207 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -67,6 +67,7 @@ enum OID { OID_msOutlookExpress, /* 1.3.6.1.4.1.311.16.4 */ OID_ntlmssp, /* 1.3.6.1.4.1.311.2.2.10 */ + OID_negoex, /* 1.3.6.1.4.1.311.2.2.30 */ OID_spnego, /* 1.3.6.1.5.5.2 */ -- cgit v1.2.3 From 24e0e61db3cb86a66824531989f1df80e0939f26 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 4 Sep 2023 22:42:56 +0200 Subject: ata: libata: disallow dev-initiated LPM transitions to unsupported states MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In AHCI 1.3.1, the register description for CAP.SSC: "When cleared to ‘0’, software must not allow the HBA to initiate transitions to the Slumber state via agressive link power management nor the PxCMD.ICC field in each port, and the PxSCTL.IPM field in each port must be programmed to disallow device initiated Slumber requests." In AHCI 1.3.1, the register description for CAP.PSC: "When cleared to ‘0’, software must not allow the HBA to initiate transitions to the Partial state via agressive link power management nor the PxCMD.ICC field in each port, and the PxSCTL.IPM field in each port must be programmed to disallow device initiated Partial requests." Ensure that we always set the corresponding bits in PxSCTL.IPM, such that a device is not allowed to initiate transitions to power states which are unsupported by the HBA. DevSleep is always initiated by the HBA, however, for completeness, set the corresponding bit in PxSCTL.IPM such that agressive link power management cannot transition to DevSleep if DevSleep is not supported. sata_link_scr_lpm() is used by libahci, ata_piix and libata-pmp. However, only libahci has the ability to read the CAP/CAP2 register to see if these features are supported. Therefore, in order to not introduce any regressions on ata_piix or libata-pmp, create flags that indicate that the respective feature is NOT supported. This way, the behavior for ata_piix and libata-pmp should remain unchanged. This change is based on a patch originally submitted by Runa Guo-oc. Signed-off-by: Niklas Cassel Fixes: 1152b2617a6e ("libata: implement sata_link_scr_lpm() and make ata_dev_set_feature() global") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal --- include/linux/libata.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 52d58b13e5ee..bf4913f4d7ac 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -222,6 +222,10 @@ enum { ATA_HOST_PARALLEL_SCAN = (1 << 2), /* Ports on this host can be scanned in parallel */ ATA_HOST_IGNORE_ATA = (1 << 3), /* Ignore ATA devices on this host. */ + ATA_HOST_NO_PART = (1 << 4), /* Host does not support partial */ + ATA_HOST_NO_SSC = (1 << 5), /* Host does not support slumber */ + ATA_HOST_NO_DEVSLP = (1 << 6), /* Host does not support devslp */ + /* bits 24:31 of host->flags are reserved for LLD specific flags */ /* various lengths of time */ -- cgit v1.2.3 From 2f4d3e293392571e02b106c8b431b638bd029276 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Sep 2023 16:40:32 +0300 Subject: gpio: pca953x: Drop unused fields in struct pca953x_platform_data New code should solely use firmware nodes for the specifics and not any callbacks. Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- include/linux/platform_data/pca953x.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/pca953x.h b/include/linux/platform_data/pca953x.h index 96c1a14ab365..3c3787c4d96c 100644 --- a/include/linux/platform_data/pca953x.h +++ b/include/linux/platform_data/pca953x.h @@ -11,21 +11,8 @@ struct pca953x_platform_data { /* number of the first GPIO */ unsigned gpio_base; - /* initial polarity inversion setting */ - u32 invert; - /* interrupt base */ int irq_base; - - void *context; /* param to setup/teardown */ - - int (*setup)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - void (*teardown)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - const char *const *names; }; #endif /* _LINUX_PCA953X_H */ -- cgit v1.2.3 From cf8e8658100d4eae80ce9b21f7a81cb024dd5057 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Oct 2022 15:54:33 +0200 Subject: arch: Remove Itanium (IA-64) architecture The Itanium architecture is obsolete, and an informal survey [0] reveals that any residual use of Itanium hardware in production is mostly HP-UX or OpenVMS based. The use of Linux on Itanium appears to be limited to enthusiasts that occasionally boot a fresh Linux kernel to see whether things are still working as intended, and perhaps to churn out some distro packages that are rarely used in practice. None of the original companies behind Itanium still produce or support any hardware or software for the architecture, and it is listed as 'Orphaned' in the MAINTAINERS file, as apparently, none of the engineers that contributed on behalf of those companies (nor anyone else, for that matter) have been willing to support or maintain the architecture upstream or even be responsible for applying the odd fix. The Intel firmware team removed all IA-64 support from the Tianocore/EDK2 reference implementation of EFI in 2018. (Itanium is the original architecture for which EFI was developed, and the way Linux supports it deviates significantly from other architectures.) Some distros, such as Debian and Gentoo, still maintain [unofficial] ia64 ports, but many have dropped support years ago. While the argument is being made [1] that there is a 'for the common good' angle to being able to build and run existing projects such as the Grid Community Toolkit [2] on Itanium for interoperability testing, the fact remains that none of those projects are known to be deployed on Linux/ia64, and very few people actually have access to such a system in the first place. Even if there were ways imaginable in which Linux/ia64 could be put to good use today, what matters is whether anyone is actually doing that, and this does not appear to be the case. There are no emulators widely available, and so boot testing Itanium is generally infeasible for ordinary contributors. GCC still supports IA-64 but its compile farm [3] no longer has any IA-64 machines. GLIBC would like to get rid of IA-64 [4] too because it would permit some overdue code cleanups. In summary, the benefits to the ecosystem of having IA-64 be part of it are mostly theoretical, whereas the maintenance overhead of keeping it supported is real. So let's rip off the band aid, and remove the IA-64 arch code entirely. This follows the timeline proposed by the Debian/ia64 maintainer [5], which removes support in a controlled manner, leaving IA-64 in a known good state in the most recent LTS release. Other projects will follow once the kernel support is removed. [0] https://lore.kernel.org/all/CAMj1kXFCMh_578jniKpUtx_j8ByHnt=s7S+yQ+vGbKt9ud7+kQ@mail.gmail.com/ [1] https://lore.kernel.org/all/0075883c-7c51-00f5-2c2d-5119c1820410@web.de/ [2] https://gridcf.org/gct-docs/latest/index.html [3] https://cfarm.tetaneutral.net/machines/list/ [4] https://lore.kernel.org/all/87bkiilpc4.fsf@mid.deneb.enyo.de/ [5] https://lore.kernel.org/all/ff58a3e76e5102c94bb5946d99187b358def688a.camel@physik.fu-berlin.de/ Acked-by: Tony Luck Signed-off-by: Ard Biesheuvel --- include/linux/acpi.h | 9 +-------- include/linux/efi.h | 7 ------- include/linux/mm.h | 2 -- include/linux/moduleparam.h | 2 +- 4 files changed, 2 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a73246c3c35e..9bcf5641a7cf 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -259,7 +259,7 @@ void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); /* the following numa functions are architecture-dependent */ void acpi_numa_slit_init (struct acpi_table_slit *slit); -#if defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_LOONGARCH) +#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); #else static inline void @@ -1114,15 +1114,8 @@ static inline int acpi_get_lps0_constraint(struct device *dev) return ACPI_STATE_UNKNOWN; } #endif /* CONFIG_SUSPEND && CONFIG_X86 */ -#ifndef CONFIG_IA64 void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else -static inline void arch_reserve_mem_area(acpi_physical_address addr, - size_t size) -{ -} -#endif /* CONFIG_X86 */ -#else #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0) #endif diff --git a/include/linux/efi.h b/include/linux/efi.h index 80b21d1c6eaf..9cc5bf32f6f2 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -358,13 +358,10 @@ void efi_native_runtime_setup(void); * where the UEFI SPEC breaks the line. */ #define NULL_GUID EFI_GUID(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) -#define MPS_TABLE_GUID EFI_GUID(0xeb9d2d2f, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81) #define SMBIOS_TABLE_GUID EFI_GUID(0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define SMBIOS3_TABLE_GUID EFI_GUID(0xf2fd1544, 0x9794, 0x4a2c, 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94) -#define SAL_SYSTEM_TABLE_GUID EFI_GUID(0xeb9d2d32, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) -#define HCDP_TABLE_GUID EFI_GUID(0xf951938d, 0x620b, 0x42ef, 0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98) #define UGA_IO_PROTOCOL_GUID EFI_GUID(0x61a4d49e, 0x6f68, 0x4f1b, 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0x0b, 0x07, 0xa2) #define EFI_GLOBAL_VARIABLE_GUID EFI_GUID(0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c) #define UV_SYSTEM_TABLE_GUID EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93) @@ -851,10 +848,6 @@ static inline int efi_range_is_wc(unsigned long start, unsigned long len) return 1; } -#ifdef CONFIG_EFI_PCDP -extern int __init efi_setup_pcdp_console(char *); -#endif - /* * We play games with efi_enabled so that the compiler will, if * possible, remove EFI-related code altogether. diff --git a/include/linux/mm.h b/include/linux/mm.h index bf5d0b1b16f4..17d530e12f70 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -362,8 +362,6 @@ extern unsigned int kobjsize(const void *objp); # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 -#elif defined(CONFIG_IA64) -# define VM_GROWSUP VM_ARCH_1 #elif defined(CONFIG_SPARC64) # define VM_SPARC_ADI VM_ARCH_1 /* Uses ADI tag for access control */ # define VM_ARCH_CLEAR VM_SPARC_ADI diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 962cd41a2cb5..99e4f1f718c7 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -276,7 +276,7 @@ struct kparam_array read-only sections (which is part of respective UNIX ABI on these platforms). So 'const' makes no sense and even causes compile failures with some compilers. */ -#if defined(CONFIG_ALPHA) || defined(CONFIG_IA64) || defined(CONFIG_PPC64) +#if defined(CONFIG_ALPHA) || defined(CONFIG_PPC64) #define __moduleparam_const #else #define __moduleparam_const const -- cgit v1.2.3 From f5e836884d8e55b416dfad55c29481ec1b65c1f0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 13 Jan 2023 17:57:47 +0100 Subject: kernel: Drop IA64 support from sig_fault handlers Signed-off-by: Ard Biesheuvel --- include/linux/sched/signal.h | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0014d3adaf84..155332977239 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -303,20 +303,11 @@ static inline void kernel_signal_stop(void) schedule(); } -#ifdef __ia64__ -# define ___ARCH_SI_IA64(_a1, _a2, _a3) , _a1, _a2, _a3 -#else -# define ___ARCH_SI_IA64(_a1, _a2, _a3) -#endif -int force_sig_fault_to_task(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) - , struct task_struct *t); -int force_sig_fault(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)); -int send_sig_fault(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) - , struct task_struct *t); +int force_sig_fault_to_task(int sig, int code, void __user *addr, + struct task_struct *t); +int force_sig_fault(int sig, int code, void __user *addr); +int send_sig_fault(int sig, int code, void __user *addr, struct task_struct *t); int force_sig_mceerr(int code, void __user *, short); int send_sig_mceerr(int code, void __user *, short, struct task_struct *); -- cgit v1.2.3 From b089ea3cc30de85ea7e20aa66500feb4082dfbf7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 13 Jan 2023 18:08:32 +0100 Subject: lib/raid6: Drop IA64 support Drop Itanium support from the RAID6 code, and along with it, the 16x and 32x unrolled versions, which were only used by IA64. Signed-off-by: Ard Biesheuvel --- include/linux/raid/pq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 006e18decfad..98030accf641 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -84,8 +84,6 @@ extern const struct raid6_calls raid6_intx1; extern const struct raid6_calls raid6_intx2; extern const struct raid6_calls raid6_intx4; extern const struct raid6_calls raid6_intx8; -extern const struct raid6_calls raid6_intx16; -extern const struct raid6_calls raid6_intx32; extern const struct raid6_calls raid6_mmxx1; extern const struct raid6_calls raid6_mmxx2; extern const struct raid6_calls raid6_sse1x1; -- cgit v1.2.3 From f42dafe3da0cd887c9d2aaa59576f2a92ee4d876 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Sun, 3 Sep 2023 21:06:57 +0200 Subject: gpiolib: unexport gpiod_set_transitory() There are no and never have been any users of gpiod_set_transitory() outside the core GPIOLIB code. Make it private. Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij --- include/linux/gpio/consumer.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 1c4385a00f88..6cc345440a5b 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -159,7 +159,6 @@ int gpiod_set_raw_array_value_cansleep(unsigned int array_size, int gpiod_set_config(struct gpio_desc *desc, unsigned long config); int gpiod_set_debounce(struct gpio_desc *desc, unsigned int debounce); -int gpiod_set_transitory(struct gpio_desc *desc, bool transitory); void gpiod_toggle_active_low(struct gpio_desc *desc); int gpiod_is_active_low(const struct gpio_desc *desc); @@ -494,13 +493,6 @@ static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned int deboun return -ENOSYS; } -static inline int gpiod_set_transitory(struct gpio_desc *desc, bool transitory) -{ - /* GPIO can never have been requested */ - WARN_ON(desc); - return -ENOSYS; -} - static inline void gpiod_toggle_active_low(struct gpio_desc *desc) { /* GPIO can never have been requested */ -- cgit v1.2.3 From 8de54392b849a612f337044d81d9859ee95ab871 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 1 Sep 2023 13:35:20 +0200 Subject: gpiolib: remove stray newline in gpio/driver.h Fix a double newline in the GPIO provider header. Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 4f0c5d62c8f3..1571cfca65e7 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -764,7 +764,6 @@ void gpiochip_free_own_desc(struct gpio_desc *desc); int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset); - struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); #else /* CONFIG_GPIOLIB */ -- cgit v1.2.3 From 37d42ab3924919652858f836a80ab49ec7d11f1e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 1 Sep 2023 13:34:58 +0200 Subject: gpiolib: remove unnecessary extern specifiers from the driver header 'extern' doesn't do anything for function declarations. Remove it. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij --- include/linux/gpio/driver.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 1571cfca65e7..b721422f4bfa 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -529,8 +529,7 @@ struct gpio_chip { #endif /* CONFIG_OF_GPIO */ }; -extern const char *gpiochip_is_requested(struct gpio_chip *gc, - unsigned int offset); +const char *gpiochip_is_requested(struct gpio_chip *gc, unsigned int offset); /** * for_each_requested_gpio_in_range - iterates over requested GPIOs in a given range @@ -549,9 +548,9 @@ extern const char *gpiochip_is_requested(struct gpio_chip *gc, for_each_requested_gpio_in_range(chip, i, 0, chip->ngpio, label) /* add/remove chips */ -extern int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, - struct lock_class_key *lock_key, - struct lock_class_key *request_key); +int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, + struct lock_class_key *lock_key, + struct lock_class_key *request_key); /** * gpiochip_add_data() - register a gpio_chip @@ -599,13 +598,13 @@ static inline int gpiochip_add(struct gpio_chip *gc) { return gpiochip_add_data(gc, NULL); } -extern void gpiochip_remove(struct gpio_chip *gc); -extern int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, void *data, - struct lock_class_key *lock_key, - struct lock_class_key *request_key); +void gpiochip_remove(struct gpio_chip *gc); +int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, + void *data, struct lock_class_key *lock_key, + struct lock_class_key *request_key); -extern struct gpio_chip *gpiochip_find(void *data, - int (*match)(struct gpio_chip *gc, void *data)); +struct gpio_chip *gpiochip_find(void *data, + int (*match)(struct gpio_chip *gc, void *data)); bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset); int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset); -- cgit v1.2.3 From 2ae5c9248e06dac2c2360be26b4e25f673238337 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Thu, 31 Aug 2023 11:22:58 -0700 Subject: wifi: mac80211: Use flexible array in struct ieee80211_tim_ie Currently struct ieee80211_tim_ie defines: u8 virtual_map[1]; Per the guidance in [1] change this to be a flexible array. Per the discussion in [2] wrap the virtual_map in a union with a u8 item in order to preserve the existing expectation that the virtual_map must contain at least one octet (at least when used in a non-S1G PPDU). This means that no driver changes are required. [1] https://docs.kernel.org/process/deprecated.html#zero-length-and-one-element-arrays [2] https://lore.kernel.org/linux-wireless/202308301529.AC90A9EF98@keescook/ Suggested-by: Kees Cook Signed-off-by: Jeff Johnson Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20230831-ieee80211_tim_ie-v3-2-e10ff584ab5d@quicinc.com [add wifi prefix] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index bd2f6e19c357..340d7e0f6bf7 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -951,17 +951,24 @@ struct ieee80211_wide_bw_chansw_ie { * @dtim_count: DTIM Count * @dtim_period: DTIM Period * @bitmap_ctrl: Bitmap Control + * @required_octet: "Syntatic sugar" to force the struct size to the + * minimum valid size when carried in a non-S1G PPDU * @virtual_map: Partial Virtual Bitmap * * This structure represents the payload of the "TIM element" as - * described in IEEE Std 802.11-2020 section 9.4.2.5. + * described in IEEE Std 802.11-2020 section 9.4.2.5. Note that this + * definition is only applicable when the element is carried in a + * non-S1G PPDU. When the TIM is carried in an S1G PPDU, the Bitmap + * Control and Partial Virtual Bitmap may not be present. */ struct ieee80211_tim_ie { u8 dtim_count; u8 dtim_period; u8 bitmap_ctrl; - /* variable size: 1 - 251 bytes */ - u8 virtual_map[1]; + union { + u8 required_octet; + DECLARE_FLEX_ARRAY(u8, virtual_map); + }; } __packed; /** -- cgit v1.2.3 From 0d423c4a78984dd02f6596d6fd9dd40446eec517 Mon Sep 17 00:00:00 2001 From: Alexey Romanov Date: Wed, 30 Aug 2023 17:08:50 +0300 Subject: drivers: meson: sm: correct meson_sm_* API retval handling 1. Following the ARM SMC32 calling convention, the return value from secure monitor is a 32-bit signed integer. This patch changes the type of the return value of the function meson_sm_call(). 2. Now, when meson_sm_call() returns a 32-bit signed integer, we need to ensure that this value is not negative. It is important to check that the return value is not negative in both the meson_sm_call_read() and meson_sm_call_write() functions. 3. Add a comment explaining why it is necessary to check if the SMC return value is equal to 0 in the function meson_sm_call_read(). It is not obvious when reading this code. Signed-off-by: Alexey Romanov Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20230830140850.17130-1-avromanov@salutedevices.com Signed-off-by: Neil Armstrong --- include/linux/firmware/meson/meson_sm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware/meson/meson_sm.h b/include/linux/firmware/meson/meson_sm.h index 95b0da2326a9..8eaf8922ab02 100644 --- a/include/linux/firmware/meson/meson_sm.h +++ b/include/linux/firmware/meson/meson_sm.h @@ -19,7 +19,7 @@ enum { struct meson_sm_firmware; int meson_sm_call(struct meson_sm_firmware *fw, unsigned int cmd_index, - u32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); + s32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); int meson_sm_call_write(struct meson_sm_firmware *fw, void *buffer, unsigned int b_size, unsigned int cmd_index, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); -- cgit v1.2.3 From 3e15dcf77b23b8e9b9b7f3c0d4def8fe9c12c534 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 8 Sep 2023 16:28:59 +0300 Subject: fs: rename __mnt_{want,drop}_write*() helpers Before exporting these helpers to modules, make their names more meaningful. The names mnt_{get,put)_write_access*() were chosen, because they rhyme with the inode {get,put)_write_access() helpers, which have a very close meaning for the inode object. Suggested-by: Christian Brauner Link: https://lore.kernel.org/r/20230817-anfechtbar-ruhelosigkeit-8c6cca8443fc@brauner/ Signed-off-by: Amir Goldstein Message-Id: <20230908132900.2983519-2-amir73il@gmail.com> Signed-off-by: Christian Brauner --- include/linux/mount.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 4f40b40306d0..ac3dd2876197 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -92,8 +92,8 @@ extern bool __mnt_is_readonly(struct vfsmount *mnt); extern bool mnt_may_suid(struct vfsmount *mnt); extern struct vfsmount *clone_private_mount(const struct path *path); -extern int __mnt_want_write(struct vfsmount *); -extern void __mnt_drop_write(struct vfsmount *); +int mnt_get_write_access(struct vfsmount *mnt); +void mnt_put_write_access(struct vfsmount *mnt); extern struct vfsmount *fc_mount(struct fs_context *fc); extern struct vfsmount *vfs_create_mount(struct fs_context *fc); -- cgit v1.2.3 From fa671e4f1556e2c18e5443f777a75ae041290068 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 7 Sep 2023 10:52:03 +0200 Subject: fbdev/core: Unexport logo helpers The interfaces for the fbdev logo are not used outside of the fbdev module. Hence declare the fbdev logo functions in the internal header file and remove their symbol exports. Only build the functions if CONFIG_LOGO has been selected. Signed-off-by: Thomas Zimmermann Acked-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20230907085408.9354-5-tzimmermann@suse.de --- include/linux/fb.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 16c3e6d6c55d..d110676c9c83 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -591,8 +591,6 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf, /* drivers/video/fbmem.c */ extern int register_framebuffer(struct fb_info *fb_info); extern void unregister_framebuffer(struct fb_info *fb_info); -extern int fb_prepare_logo(struct fb_info *fb_info, int rotate); -extern int fb_show_logo(struct fb_info *fb_info, int rotate); extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); extern void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx, u32 height, u32 shift_high, u32 shift_low, u32 mod); @@ -603,9 +601,6 @@ extern int fb_get_color_depth(struct fb_var_screeninfo *var, extern int fb_get_options(const char *name, char **option); extern int fb_new_modelist(struct fb_info *info); -extern bool fb_center_logo; -extern int fb_logo_count; - static inline void lock_fb_info(struct fb_info *info) { mutex_lock(&info->lock); -- cgit v1.2.3 From ebc7abb35b258152d4a424f89d7c03db1d7ce61c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 7 Sep 2023 20:18:56 +0200 Subject: thermal: Constify the trip argument of the .get_trend() zone callback Add 'const' to the definition of the 'trip' argument of the .get_trend() thermal zone callback to indicate that the trip point passed to it should not be modified by it and adjust the callback functions implementing it, thermal_get_trend() in the ACPI thermal driver and __ti_thermal_get_trend(), accordingly. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Michal Wilczynski --- include/linux/thermal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index c99440aac1a1..a5ae4af955ff 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -80,8 +80,8 @@ struct thermal_zone_device_ops { int (*set_trip_hyst) (struct thermal_zone_device *, int, int); int (*get_crit_temp) (struct thermal_zone_device *, int *); int (*set_emul_temp) (struct thermal_zone_device *, int); - int (*get_trend) (struct thermal_zone_device *, struct thermal_trip *, - enum thermal_trend *); + int (*get_trend) (struct thermal_zone_device *, + const struct thermal_trip *, enum thermal_trend *); void (*hot)(struct thermal_zone_device *); void (*critical)(struct thermal_zone_device *); }; -- cgit v1.2.3 From ef7d9593390a050c50eba5fc02d2cb65a1104434 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:39:04 -0700 Subject: xfs: remove CPU hotplug infrastructure There are no users of the cpu hotplug hooks in xfs now, so remove it. This reverts f1653c2e2831e ("xfs: introduce CPU hotplug infrastructure"). Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 06dda85f0424..068f7738be22 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -90,7 +90,6 @@ enum cpuhp_state { CPUHP_FS_BUFF_DEAD, CPUHP_PRINTK_DEAD, CPUHP_MM_MEMCQ_DEAD, - CPUHP_XFS_DEAD, CPUHP_PERCPU_CNT_DEAD, CPUHP_RADIX_DEAD, CPUHP_PAGE_ALLOC, -- cgit v1.2.3 From 5b404fdabacf4bee92d8c66013402a85f18a6a10 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 15 Aug 2023 15:46:32 -0700 Subject: rcu: Add RCU CPU stall notifier It is sometimes helpful to have a way for the subsystem causing the stall to dump its state when an RCU CPU stall occurs. This commit therefore bases rcu_stall_chain_notifier_register() and rcu_stall_chain_notifier_unregister() on atomic notifiers in order to provide this functionality. Signed-off-by: Paul E. McKenney Cc: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/linux/rcu_notifier.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/linux/rcu_notifier.h (limited to 'include/linux') diff --git a/include/linux/rcu_notifier.h b/include/linux/rcu_notifier.h new file mode 100644 index 000000000000..ebf371364581 --- /dev/null +++ b/include/linux/rcu_notifier.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Read-Copy Update notifiers, initially RCU CPU stall notifier. + * Separate from rcupdate.h to avoid #include loops. + * + * Copyright (C) 2023 Paul E. McKenney. + */ + +#ifndef __LINUX_RCU_NOTIFIER_H +#define __LINUX_RCU_NOTIFIER_H + +// Actions for RCU CPU stall notifier calls. +#define RCU_STALL_NOTIFY_NORM 1 +#define RCU_STALL_NOTIFY_EXP 2 + +#ifdef CONFIG_RCU_STALL_COMMON + +#include +#include + +int rcu_stall_chain_notifier_register(struct notifier_block *n); +int rcu_stall_chain_notifier_unregister(struct notifier_block *n); + +#else // #ifdef CONFIG_RCU_STALL_COMMON + +// No RCU CPU stall warnings in Tiny RCU. +static inline int rcu_stall_chain_notifier_register(struct notifier_block *n) { return -EEXIST; } +static inline int rcu_stall_chain_notifier_unregister(struct notifier_block *n) { return -ENOENT; } + +#endif // #else // #ifdef CONFIG_RCU_STALL_COMMON + +#endif /* __LINUX_RCU_NOTIFIER_H */ -- cgit v1.2.3 From fc52a64416b010c8324e2cb50070faae868521c1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 8 Sep 2023 16:39:29 -0400 Subject: tracing/synthetic: Fix order of struct trace_dynamic_info To make handling BIG and LITTLE endian better the offset/len of dynamic fields of the synthetic events was changed into a structure of: struct trace_dynamic_info { #ifdef CONFIG_CPU_BIG_ENDIAN u16 offset; u16 len; #else u16 len; u16 offset; #endif }; to replace the manual changes of: data_offset = offset & 0xffff; data_offest = len << 16; But if you look closely, the above is: << 16 | offset Which in little endian would be in memory: offset_lo offset_hi len_lo len_hi and in big endian: len_hi len_lo offset_hi offset_lo Which if broken into a structure would be: struct trace_dynamic_info { #ifdef CONFIG_CPU_BIG_ENDIAN u16 len; u16 offset; #else u16 offset; u16 len; #endif }; Which is the opposite of what was defined. Fix this and just to be safe also add "__packed". Link: https://lore.kernel.org/all/20230908154417.5172e343@gandalf.local.home/ Link: https://lore.kernel.org/linux-trace-kernel/20230908163929.2c25f3dc@gandalf.local.home Cc: stable@vger.kernel.org Cc: Mark Rutland Tested-by: Sven Schnelle Acked-by: Masami Hiramatsu (Google) Fixes: ddeea494a16f3 ("tracing/synthetic: Use union instead of casts") Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 12f875e9e69a..21ae37e49319 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -62,13 +62,13 @@ void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...); /* Used to find the offset and length of dynamic fields in trace events */ struct trace_dynamic_info { #ifdef CONFIG_CPU_BIG_ENDIAN - u16 offset; u16 len; + u16 offset; #else - u16 len; u16 offset; + u16 len; #endif -}; +} __packed; /* * The trace entry - the most basic unit of tracing. This is what -- cgit v1.2.3 From 49f776724e64c27dd861e7ac8da9d42f01d9d172 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Wed, 23 Aug 2023 23:43:04 +0000 Subject: PCI/AER: Export pcie_aer_is_native() Export and move the declaration of pcie_aer_is_native() to a common header file to be reused by cxl/pci module. Signed-off-by: Smita Koralahalli Acked-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Robert Richter Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230823234305.27333-3-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams --- include/linux/aer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/aer.h b/include/linux/aer.h index 2dd175f5debd..29cc10220952 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -42,11 +42,13 @@ struct aer_capability_regs { #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); +int pcie_aer_is_native(struct pci_dev *dev); #else static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; } +static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } #endif void cper_print_aer(struct pci_dev *dev, int aer_severity, -- cgit v1.2.3 From 2b69987be575b92adb6c177679f3c559134f0d8f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 16 Oct 2019 15:03:50 -0400 Subject: sched: Add task_struct->faults_disabled_mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There has been a long standing page cache coherence bug with direct IO. This provides part of a mechanism to fix it, currently just used by bcachefs but potentially worth promoting to the VFS. Direct IO evicts the range of the pagecache being read or written to. For reads, we need dirty pages to be written to disk, so that the read doesn't return stale data. For writes, we need to evict that range of the pagecache so that it's not stale after the write completes. However, without a locking mechanism to prevent those pages from being re-added to the pagecache - by a buffered read or page fault - page cache inconsistency is still possible. This isn't necessarily just an issue for userspace when they're playing games; filesystems may hang arbitrary state off the pagecache, and so page cache inconsistency may cause real filesystem bugs, depending on the filesystem. This is less of an issue for iomap based filesystems, but e.g. buffer heads caches disk block mappings (!) and attaches them to the pagecache, and bcachefs attaches disk reservations to pagecache pages. This issue has been hard to fix, because - we need to add a lock (henceforth called pagecache_add_lock), which would be held for the duration of the direct IO - page faults add pages to the page cache, thus need to take the same lock - dio -> gup -> page fault thus can deadlock And we cannot enforce a lock ordering with this lock, since userspace will be controlling the lock ordering (via the fd and buffer arguments to direct IOs), so we need a different method of deadlock avoidance. We need to tell the page fault handler that we're already holding a pagecache_add_lock, and since plumbing it through the entire gup() path would be highly impractical this adds a field to task_struct. Then the full method is: - in the dio path, when we first take the pagecache_add_lock, note the mapping in the current task_struct - in the page fault handler, if faults_disabled_mapping is set, we check if it's the same mapping as the one we're taking a page fault for, and if so return an error. Then we check lock ordering: if there's a lock ordering violation and trylock fails, we'll have to cycle the locks and return an error that tells the DIO path to retry: faults_disabled_mapping is also used for signalling "locks were dropped, please retry". Also relevant to this patch: mapping->invalidate_lock. mapping->invalidate_lock provides most of the required semantics - it's used by truncate/fallocate to block pages being added to the pagecache. However, since it's a rwsem, direct IOs would need to take the write side in order to block page cache adds, and would then be exclusive with each other - we'll need a new type of lock to pair with this approach. Signed-off-by: Kent Overstreet Cc: Jan Kara Cc: Darrick J. Wong Cc: linux-fsdevel@vger.kernel.org Cc: Andreas Grünbacher --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 77f01ac385f7..d5951e99706a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -875,6 +875,7 @@ struct task_struct { struct mm_struct *mm; struct mm_struct *active_mm; + struct address_space *faults_disabled_mapping; int exit_state; int exit_code; -- cgit v1.2.3 From 771eb4fe8b420bb8563863e242861e635c742bc2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 9 Jul 2018 23:27:33 -0400 Subject: fs: factor out d_mark_tmpfile() New helper for bcachefs - bcachefs doesn't want the inode_dec_link_count() call that d_tmpfile does, it handles i_nlink on its own atomically with other btree updates Signed-off-by: Kent Overstreet Cc: Alexander Viro Cc: Christian Brauner Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Darrick J. Wong Reviewed-by: Christian Brauner --- include/linux/dcache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 6b351e009f59..3da2f0545d5d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -251,6 +251,7 @@ extern struct dentry * d_make_root(struct inode *); /* - the ramfs-type tree */ extern void d_genocide(struct dentry *); +extern void d_mark_tmpfile(struct file *, struct inode *); extern void d_tmpfile(struct file *, struct inode *); extern struct dentry *d_find_alias(struct inode *); -- cgit v1.2.3 From 83feeb195592b533541ff6399c8084e5b7c59677 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 25 Apr 2022 15:26:28 -0400 Subject: lib/string_helpers: string_get_size() now returns characters wrote printbuf now needs to know the number of characters that would have been written if the buffer was too small, like snprintf(); this changes string_get_size() to return the the return value of snprintf(). Signed-off-by: Kent Overstreet --- include/linux/string_helpers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 9d1f5bb74dd5..58fb1f90eda5 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -24,8 +24,8 @@ enum string_size_units { STRING_UNITS_2, /* use binary powers of 2^10 */ }; -void string_get_size(u64 size, u64 blk_size, enum string_size_units units, - char *buf, int len); +int string_get_size(u64 size, u64 blk_size, enum string_size_units units, + char *buf, int len); int parse_int_array_user(const char __user *from, size_t count, int **array); -- cgit v1.2.3 From fe4fa2e4f7d0722c179fffa25911ea35cafadce2 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 1 Sep 2023 13:29:25 +0200 Subject: gpiolib: make gpiochip_get_desc() public It makes sense for a GPIO driver to want to get its own descriptor without requesting it. After all, the driver knows that it'll still be valid. Let's move this helper to linux/gpio/driver.h. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Acked-by: Linus Walleij --- include/linux/gpio/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index b721422f4bfa..8f0859ba7065 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -757,6 +757,8 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, enum gpiod_flags dflags); void gpiochip_free_own_desc(struct gpio_desc *desc); +struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum); + #ifdef CONFIG_GPIOLIB /* lock/unlock as IRQ */ -- cgit v1.2.3 From 2c97d3e55b70edf33b6e7f211bab8a748a0a2bcc Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Wed, 30 Aug 2023 15:22:37 +1200 Subject: platform/x86: asus-wmi: add support for ASUS screenpad Add support for the WMI methods used to turn off and adjust the brightness of the secondary "screenpad" device found on some high-end ASUS laptops like the GX650P series and others. There are some small quirks with this device when considering only the raw WMI methods: 1. The Off method can only switch the device off 2. Changing the brightness turns the device back on 3. To turn the device back on the brightness must be > 1 4. When the device is off the brightness can't be changed (so it is stored by the driver if device is off). 5. Booting with a value of 0 brightness (retained by bios) means the bios will set a value of >0 <15 6. When the device is off it is "unplugged" asus_wmi sets the minimum brightness as 20 in general use, and 60 for booting with values <= min. The ACPI methods are used in a new backlight device named asus_screenpad. Signed-off-by: Luke D. Jones Link: https://lore.kernel.org/r/20230830032237.42987-2-luke@ljones.dev Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/platform_data/x86/asus-wmi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 16e99a1c37fc..63e630276499 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -58,6 +58,10 @@ #define ASUS_WMI_DEVID_KBD_BACKLIGHT 0x00050021 #define ASUS_WMI_DEVID_LIGHT_SENSOR 0x00050022 /* ?? */ #define ASUS_WMI_DEVID_LIGHTBAR 0x00050025 +/* This can only be used to disable the screen, not re-enable */ +#define ASUS_WMI_DEVID_SCREENPAD_POWER 0x00050031 +/* Writing a brightness re-enables the screen if disabled */ +#define ASUS_WMI_DEVID_SCREENPAD_LIGHT 0x00050032 #define ASUS_WMI_DEVID_FAN_BOOST_MODE 0x00110018 #define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY 0x00120075 -- cgit v1.2.3 From 4eaf928622abc5dabc9b42a0de4dafbe29ddf491 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Fri, 11 Aug 2023 17:57:01 +0800 Subject: iio: Remove unused declarations Commit 0f3a8c3f34f7 ("iio: Add support for creating IIO devices via configfs") declared but never implemented iio_sw_device_type_configfs_{un}register(). Commit b662f809d410 ("iio: core: Introduce IIO software triggers") declared but never implemented iio_sw_trigger_type_configfs_{un}register(). Commit a3e0b51884ee ("iio: accel: add support for FXLS8962AF/FXLS8964AF accelerometers") declared but never implemented fxls8962af_core_remove(). Commit 8dedcc3eee3a ("iio: core: centralize ioctl() calls to the main chardev") declared but never implemented iio_device_ioctl(). Commit d430f3c36ca6 ("iio: imu: inv_mpu6050: Use regmap instead of i2c specific functions") removed inv_mpu6050_write_reg() but not its declaration. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20230811095701.35372-1-yuehaibing@huawei.com Signed-off-by: Jonathan Cameron --- include/linux/iio/sw_device.h | 3 --- include/linux/iio/sw_trigger.h | 3 --- 2 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/sw_device.h b/include/linux/iio/sw_device.h index eff1e6b2595c..0f7fe7b522e3 100644 --- a/include/linux/iio/sw_device.h +++ b/include/linux/iio/sw_device.h @@ -51,9 +51,6 @@ void iio_unregister_sw_device_type(struct iio_sw_device_type *dt); struct iio_sw_device *iio_sw_device_create(const char *, const char *); void iio_sw_device_destroy(struct iio_sw_device *); -int iio_sw_device_type_configfs_register(struct iio_sw_device_type *dt); -void iio_sw_device_type_configfs_unregister(struct iio_sw_device_type *dt); - static inline void iio_swd_group_init_type_name(struct iio_sw_device *d, const char *name, diff --git a/include/linux/iio/sw_trigger.h b/include/linux/iio/sw_trigger.h index 47de2443e984..bc77f88df303 100644 --- a/include/linux/iio/sw_trigger.h +++ b/include/linux/iio/sw_trigger.h @@ -51,9 +51,6 @@ void iio_unregister_sw_trigger_type(struct iio_sw_trigger_type *tt); struct iio_sw_trigger *iio_sw_trigger_create(const char *, const char *); void iio_sw_trigger_destroy(struct iio_sw_trigger *); -int iio_sw_trigger_type_configfs_register(struct iio_sw_trigger_type *tt); -void iio_sw_trigger_type_configfs_unregister(struct iio_sw_trigger_type *tt); - static inline void iio_swt_group_init_type_name(struct iio_sw_trigger *t, const char *name, -- cgit v1.2.3 From 373beef00f7d781a000b12c31fb17a5a9c25969c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 11 Sep 2023 15:52:57 +0100 Subject: KVM: arm64: nvhe: Ignore SVE hint in SMCCC function ID When SVE is enabled, the host may set bit 16 in SMCCC function IDs, a hint that indicates an unused SVE state. At the moment NVHE doesn't account for this bit when inspecting the function ID, and rejects most calls. Clear the hint bit before comparing function IDs. About version compatibility: the host's PSCI driver initially probes the firmware for a SMCCC version number. If the firmware implements a protocol recent enough (1.3), subsequent SMCCC calls have the hint bit set. Since the hint bit was reserved in earlier versions of the protocol, clearing it is fine regardless of the version in use. When a new hint is added to the protocol in the future, it will be added to ARM_SMCCC_CALL_HINTS and NVHE will handle it straight away. This patch only clears known hints and leaves reserved bits as is, because future SMCCC versions could use reserved bits as modifiers for the function ID, rather than hints. Fixes: cfa7ff959a78 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint") Reported-by: Ben Horgan Signed-off-by: Jean-Philippe Brucker Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230911145254.934414-4-jean-philippe@linaro.org --- include/linux/arm-smccc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 7c67c17321d4..083f85653716 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -67,6 +67,8 @@ #define ARM_SMCCC_VERSION_1_3 0x10003 #define ARM_SMCCC_1_3_SVE_HINT 0x10000 +#define ARM_SMCCC_CALL_HINTS ARM_SMCCC_1_3_SVE_HINT + #define ARM_SMCCC_VERSION_FUNC_ID \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ -- cgit v1.2.3 From 08700ec705043eb0cee01b35cf5b9d63f0230d12 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 6 Sep 2023 03:46:57 +0900 Subject: linux/export: fix reference to exported functions for parisc64 John David Anglin reported parisc has been broken since commit ddb5cdbafaaa ("kbuild: generate KSYMTAB entries by modpost"). Like ia64, parisc64 uses a function descriptor. The function references must be prefixed with P%. Also, symbols prefixed $$ from the library have the symbol type STT_LOPROC instead of STT_FUNC. They should be handled as functions too. Fixes: ddb5cdbafaaa ("kbuild: generate KSYMTAB entries by modpost") Reported-by: John David Anglin Tested-by: John David Anglin Tested-by: Helge Deller Closes: https://lore.kernel.org/linux-parisc/1901598a-e11d-f7dd-a5d9-9a69d06e6b6e@bell.net/T/#u Signed-off-by: Masahiro Yamada Signed-off-by: Helge Deller --- include/linux/export-internal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h index 1c849db953a5..45fca09b2319 100644 --- a/include/linux/export-internal.h +++ b/include/linux/export-internal.h @@ -52,6 +52,8 @@ #ifdef CONFIG_IA64 #define KSYM_FUNC(name) @fptr(name) +#elif defined(CONFIG_PARISC) && defined(CONFIG_64BIT) +#define KSYM_FUNC(name) P%name #else #define KSYM_FUNC(name) name #endif -- cgit v1.2.3 From 25e73b7e3f72a25aa30cbb2eecb49036e0acf066 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Aug 2023 12:55:46 +0200 Subject: x86/ibt: Suppress spurious ENDBR It was reported that under certain circumstances GCC emits ENDBR instructions for _THIS_IP_ usage. Specifically, when it appears at the start of a basic block -- but not elsewhere. Since _THIS_IP_ is never used for control flow, these ENDBR instructions are completely superfluous. Override the _THIS_IP_ definition for x86_64 to avoid this. Less ENDBR instructions is better. Fixes: 156ff4a544ae ("x86/ibt: Base IBT bits") Reported-by: David Kaplan Reviewed-by: Andrew Cooper Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230802110323.016197440@infradead.org --- include/linux/instruction_pointer.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/instruction_pointer.h b/include/linux/instruction_pointer.h index cda1f706eaeb..aa0b3ffea935 100644 --- a/include/linux/instruction_pointer.h +++ b/include/linux/instruction_pointer.h @@ -2,7 +2,12 @@ #ifndef _LINUX_INSTRUCTION_POINTER_H #define _LINUX_INSTRUCTION_POINTER_H +#include + #define _RET_IP_ (unsigned long)__builtin_return_address(0) + +#ifndef _THIS_IP_ #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) +#endif #endif /* _LINUX_INSTRUCTION_POINTER_H */ -- cgit v1.2.3 From 133c4c0d37175f510a10fa9bed51e223936073fc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Sep 2023 17:05:31 +0000 Subject: tcp: defer regular ACK while processing socket backlog This idea came after a particular workload requested the quickack attribute set on routes, and a performance drop was noticed for large bulk transfers. For high throughput flows, it is best to use one cpu running the user thread issuing socket system calls, and a separate cpu to process incoming packets from BH context. (With TSO/GRO, bottleneck is usually the 'user' cpu) Problem is the user thread can spend a lot of time while holding the socket lock, forcing BH handler to queue most of incoming packets in the socket backlog. Whenever the user thread releases the socket lock, it must first process all accumulated packets in the backlog, potentially adding latency spikes. Due to flood mitigation, having too many packets in the backlog increases chance of unexpected drops. Backlog processing unfortunately shifts a fair amount of cpu cycles from the BH cpu to the 'user' cpu, thus reducing max throughput. This patch takes advantage of the backlog processing, and the fact that ACK are mostly cumulative. The idea is to detect we are in the backlog processing and defer all eligible ACK into a single one, sent from tcp_release_cb(). This saves cpu cycles on both sides, and network resources. Performance of a single TCP flow on a 200Gbit NIC: - Throughput is increased by 20% (100Gbit -> 120Gbit). - Number of generated ACK per second shrinks from 240,000 to 40,000. - Number of backlog drops per second shrinks from 230 to 0. Benchmark context: - Regular netperf TCP_STREAM (no zerocopy) - Intel(R) Xeon(R) Platinum 8481C (Saphire Rapids) - MAX_SKB_FRAGS = 17 (~60KB per GRO packet) This feature is guarded by a new sysctl, and enabled by default: /proc/sys/net/ipv4/tcp_backlog_ack_defer Signed-off-by: Eric Dumazet Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Dave Taht Signed-off-by: Paolo Abeni --- include/linux/tcp.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3c5efeeb024f..44d946161d4a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -463,15 +463,17 @@ enum tsq_enum { TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call * tcp_v{4|6}_mtu_reduced() */ + TCP_ACK_DEFERRED, /* TX pure ack is deferred */ }; enum tsq_flags { - TSQF_THROTTLED = (1UL << TSQ_THROTTLED), - TSQF_QUEUED = (1UL << TSQ_QUEUED), - TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED), - TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED), - TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED), - TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), + TSQF_THROTTLED = BIT(TSQ_THROTTLED), + TSQF_QUEUED = BIT(TSQ_QUEUED), + TCPF_TSQ_DEFERRED = BIT(TCP_TSQ_DEFERRED), + TCPF_WRITE_TIMER_DEFERRED = BIT(TCP_WRITE_TIMER_DEFERRED), + TCPF_DELACK_TIMER_DEFERRED = BIT(TCP_DELACK_TIMER_DEFERRED), + TCPF_MTU_REDUCED_DEFERRED = BIT(TCP_MTU_REDUCED_DEFERRED), + TCPF_ACK_DEFERRED = BIT(TCP_ACK_DEFERRED), }; #define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) -- cgit v1.2.3 From 2b5dcb31a19a2e0acd869b12c9db9b2d696ef544 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Tue, 12 Sep 2023 23:04:41 +0800 Subject: bpf, x64: Fix tailcall infinite loop From commit ebf7d1f508a73871 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT"), the tailcall on x64 works better than before. From commit e411901c0b775a3a ("bpf: allow for tailcalls in BPF subprograms for x64 JIT"), tailcall is able to run in BPF subprograms on x64. From commit 5b92a28aae4dd0f8 ("bpf: Support attaching tracing BPF program to other BPF programs"), BPF program is able to trace other BPF programs. How about combining them all together? 1. FENTRY/FEXIT on a BPF subprogram. 2. A tailcall runs in the BPF subprogram. 3. The tailcall calls the subprogram's caller. As a result, a tailcall infinite loop comes up. And the loop would halt the machine. As we know, in tail call context, the tail_call_cnt propagates by stack and rax register between BPF subprograms. So do in trampolines. Fixes: ebf7d1f508a7 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT") Fixes: e411901c0b77 ("bpf: allow for tailcalls in BPF subprograms for x64 JIT") Reviewed-by: Maciej Fijalkowski Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20230912150442.2009-3-hffilwlqm@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 87eeb3a46a1d..b9e573159432 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1035,6 +1035,11 @@ struct btf_func_model { */ #define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6) +/* Indicate that current trampoline is in a tail call context. Then, it has to + * cache and restore tail_call_cnt to avoid infinite tail call loop. + */ +#define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ -- cgit v1.2.3 From 8f012db27c9516be1a7aca93ea4a6ca9c75056c9 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 10 Jul 2023 13:02:58 -0700 Subject: x86/numa: Introduce numa_fill_memblks() numa_fill_memblks() fills in the gaps in numa_meminfo memblks over an physical address range. The ACPI driver will use numa_fill_memblks() to implement a new Linux policy that prescribes extending proximity domains in a portion of a CFMWS window to the entire window. Dan Williams offered this explanation of the policy: A CFWMS is an ACPI data structure that indicates *potential* locations where CXL memory can be placed. It is the playground where the CXL driver has free reign to establish regions. That space can be populated by BIOS created regions, or driver created regions, after hotplug or other reconfiguration. When BIOS creates a region in a CXL Window it additionally describes that subset of the Window range in the other typical ACPI tables SRAT, SLIT, and HMAT. The rationale for BIOS not pre-describing the entire CXL Window in SRAT, SLIT, and HMAT is that it can not predict the future. I.e. there is nothing stopping higher or lower performance devices being placed in the same Window. Compare that to ACPI memory hotplug that just onlines additional capacity in the proximity domain with little freedom for dynamic performance differentiation. That leaves the OS with a choice, should unpopulated window capacity match the proximity domain of an existing region, or should it allocate a new one? This patch takes the simple position of minimizing proximity domain proliferation by reusing any proximity domain intersection for the entire Window. If the Window has no intersections then allocate a new proximity domain. Note that SRAT, SLIT and HMAT information can be enumerated dynamically in a standard way from device provided data. Think of CXL as the end of ACPI needing to describe memory attributes, CXL offers a standard discovery model for performance attributes, but Linux still needs to interoperate with the old regime. Reported-by: Derick Marks Suggested-by: Dan Williams Signed-off-by: Alison Schofield Signed-off-by: Dave Hansen Reviewed-by: Dan Williams Tested-by: Derick Marks Link: https://lore.kernel.org/all/ef078a6f056ca974e5af85997013c0fda9e3326d.1689018477.git.alison.schofield%40intel.com --- include/linux/numa.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/numa.h b/include/linux/numa.h index 59df211d051f..0f512c0aba54 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -12,6 +12,7 @@ #define MAX_NUMNODES (1 << NODES_SHIFT) #define NUMA_NO_NODE (-1) +#define NUMA_NO_MEMBLK (-1) /* optionally keep NUMA memory info available post init */ #ifdef CONFIG_NUMA_KEEP_MEMINFO @@ -43,6 +44,12 @@ static inline int phys_to_target_node(u64 start) return 0; } #endif +#ifndef numa_fill_memblks +static inline int __init numa_fill_memblks(u64 start, u64 end) +{ + return NUMA_NO_MEMBLK; +} +#endif #else /* !CONFIG_NUMA */ static inline int numa_map_to_online_node(int node) { -- cgit v1.2.3 From 5eb1e6e459cfa025f79c43014f66ff62a55542f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 5 Sep 2023 21:42:53 +0200 Subject: i2c: Drop legacy callback .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that all drivers are converted to the (new) .probe() callback, the temporary .probe_new() can go away. \o/ Link: https://lore.kernel.org/linux-i2c/20230626094548.559542-1-u.kleine-koenig@pengutronix.de Reviewed-by: Javier Martinez Canillas Reviewed-by: Jean Delvare Signed-off-by: Uwe Kleine-König Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 3430cc2b05a6..0dae9db27538 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -237,7 +237,6 @@ enum i2c_driver_flags { * struct i2c_driver - represent an I2C device driver * @class: What kind of i2c device we instantiate (for detect) * @probe: Callback for device binding - * @probe_new: Transitional callback for device binding - do not use * @remove: Callback for device unbinding * @shutdown: Callback for device shutdown * @alert: Alert callback, for example for the SMBus alert protocol @@ -272,16 +271,8 @@ enum i2c_driver_flags { struct i2c_driver { unsigned int class; - union { /* Standard driver model interfaces */ - int (*probe)(struct i2c_client *client); - /* - * Legacy callback that was part of a conversion of .probe(). - * Today it has the same semantic as .probe(). Don't use for new - * code. - */ - int (*probe_new)(struct i2c_client *client); - }; + int (*probe)(struct i2c_client *client); void (*remove)(struct i2c_client *client); -- cgit v1.2.3 From bbaa6ffa5b6c9609d3b3c431c389b407eea5441f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 13 Sep 2023 11:32:33 +0800 Subject: power: supply: core: Use blocking_notifier_call_chain to avoid RCU complaint AMD PMF driver can cause the following warning: [ 196.159546] ------------[ cut here ]------------ [ 196.159556] Voluntary context switch within RCU read-side critical section! [ 196.159571] WARNING: CPU: 0 PID: 9 at kernel/rcu/tree_plugin.h:320 rcu_note_context_switch+0x43d/0x560 [ 196.159604] Modules linked in: nvme_fabrics ccm rfcomm snd_hda_scodec_cs35l41_spi cmac algif_hash algif_skcipher af_alg bnep joydev btusb btrtl uvcvideo btintel btbcm videobuf2_vmalloc intel_rapl_msr btmtk videobuf2_memops uvc videobuf2_v4l2 intel_rapl_common binfmt_misc hid_sensor_als snd_sof_amd_vangogh hid_sensor_trigger bluetooth industrialio_triggered_buffer videodev snd_sof_amd_rembrandt hid_sensor_iio_common amdgpu ecdh_generic kfifo_buf videobuf2_common hp_wmi kvm_amd sparse_keymap snd_sof_amd_renoir wmi_bmof industrialio ecc mc nls_iso8859_1 kvm snd_sof_amd_acp irqbypass snd_sof_xtensa_dsp crct10dif_pclmul crc32_pclmul mt7921e snd_sof_pci snd_ctl_led polyval_clmulni mt7921_common polyval_generic snd_sof ghash_clmulni_intel mt792x_lib mt76_connac_lib sha512_ssse3 snd_sof_utils aesni_intel snd_hda_codec_realtek crypto_simd mt76 snd_hda_codec_generic cryptd snd_soc_core snd_hda_codec_hdmi rapl ledtrig_audio input_leds snd_compress i2c_algo_bit drm_ttm_helper mac80211 snd_pci_ps hid_multitouch ttm drm_exec [ 196.159970] drm_suballoc_helper snd_rpl_pci_acp6x amdxcp drm_buddy snd_hda_intel snd_acp_pci snd_hda_scodec_cs35l41_i2c serio_raw gpu_sched snd_hda_scodec_cs35l41 snd_acp_legacy_common snd_intel_dspcfg snd_hda_cs_dsp_ctls snd_hda_codec libarc4 drm_display_helper snd_pci_acp6x cs_dsp snd_hwdep snd_soc_cs35l41_lib video k10temp snd_pci_acp5x thunderbolt snd_hda_core drm_kms_helper cfg80211 snd_seq snd_rn_pci_acp3x snd_pcm snd_acp_config cec snd_soc_acpi snd_seq_device rc_core ccp snd_pci_acp3x snd_timer snd soundcore wmi amd_pmf platform_profile amd_pmc mac_hid serial_multi_instantiate wireless_hotkey hid_sensor_hub sch_fq_codel msr parport_pc ppdev lp parport efi_pstore ip_tables x_tables autofs4 btrfs blake2b_generic raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx libcrc32c xor raid6_pq raid1 raid0 multipath linear dm_mirror dm_region_hash dm_log cdc_ether usbnet r8152 mii hid_generic nvme i2c_hid_acpi i2c_hid nvme_core i2c_piix4 xhci_pci amd_sfh drm xhci_pci_renesas nvme_common hid [ 196.160382] CPU: 0 PID: 9 Comm: kworker/0:1 Not tainted 6.6.0-rc1 #4 [ 196.160397] Hardware name: HP HP EliteBook 845 14 inch G10 Notebook PC/8B6E, BIOS V82 Ver. 01.02.00 08/24/2023 [ 196.160405] Workqueue: events power_supply_changed_work [ 196.160426] RIP: 0010:rcu_note_context_switch+0x43d/0x560 [ 196.160440] Code: 00 48 89 be 40 08 00 00 48 89 86 48 08 00 00 48 89 10 e9 63 fe ff ff 48 c7 c7 10 e7 b0 9e c6 05 e8 d8 20 02 01 e8 13 0f f3 ff <0f> 0b e9 27 fc ff ff a9 ff ff ff 7f 0f 84 cf fc ff ff 65 48 8b 3c [ 196.160450] RSP: 0018:ffffc900001878f0 EFLAGS: 00010046 [ 196.160462] RAX: 0000000000000000 RBX: ffff88885e834040 RCX: 0000000000000000 [ 196.160470] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 196.160476] RBP: ffffc90000187910 R08: 0000000000000000 R09: 0000000000000000 [ 196.160482] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 196.160488] R13: 0000000000000000 R14: ffff888100990000 R15: ffff888100990000 [ 196.160495] FS: 0000000000000000(0000) GS:ffff88885e800000(0000) knlGS:0000000000000000 [ 196.160504] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 196.160512] CR2: 000055cb053c8246 CR3: 000000013443a000 CR4: 0000000000750ef0 [ 196.160520] PKRU: 55555554 [ 196.160526] Call Trace: [ 196.160532] [ 196.160548] ? show_regs+0x72/0x90 [ 196.160570] ? rcu_note_context_switch+0x43d/0x560 [ 196.160580] ? __warn+0x8d/0x160 [ 196.160600] ? rcu_note_context_switch+0x43d/0x560 [ 196.160613] ? report_bug+0x1bb/0x1d0 [ 196.160637] ? handle_bug+0x46/0x90 [ 196.160658] ? exc_invalid_op+0x19/0x80 [ 196.160675] ? asm_exc_invalid_op+0x1b/0x20 [ 196.160709] ? rcu_note_context_switch+0x43d/0x560 [ 196.160727] __schedule+0xb9/0x15f0 [ 196.160746] ? srso_alias_return_thunk+0x5/0x7f [ 196.160765] ? srso_alias_return_thunk+0x5/0x7f [ 196.160778] ? acpi_ns_search_one_scope+0xbe/0x270 [ 196.160806] schedule+0x68/0x110 [ 196.160820] schedule_timeout+0x151/0x160 [ 196.160829] ? srso_alias_return_thunk+0x5/0x7f [ 196.160842] ? srso_alias_return_thunk+0x5/0x7f [ 196.160855] ? acpi_ns_lookup+0x3c5/0xa90 [ 196.160878] __down_common+0xff/0x220 [ 196.160905] __down_timeout+0x16/0x30 [ 196.160920] down_timeout+0x64/0x70 [ 196.160938] acpi_os_wait_semaphore+0x85/0x200 [ 196.160959] acpi_ut_acquire_mutex+0x9e/0x280 [ 196.160979] acpi_ex_enter_interpreter+0x2d/0xb0 [ 196.160992] acpi_ns_evaluate+0x2f0/0x5f0 [ 196.161005] acpi_evaluate_object+0x172/0x490 [ 196.161018] ? acpi_os_signal_semaphore+0x8a/0xd0 [ 196.161038] acpi_evaluate_integer+0x52/0xe0 [ 196.161055] ? kfree+0x79/0x120 [ 196.161071] ? srso_alias_return_thunk+0x5/0x7f [ 196.161089] acpi_ac_get_state.part.0+0x27/0x80 [ 196.161110] get_ac_property+0x5c/0x70 [ 196.161127] ? __pfx___power_supply_is_system_supplied+0x10/0x10 [ 196.161146] __power_supply_is_system_supplied+0x44/0xb0 [ 196.161166] class_for_each_device+0x124/0x160 [ 196.161184] ? acpi_ac_get_state.part.0+0x27/0x80 [ 196.161203] ? srso_alias_return_thunk+0x5/0x7f [ 196.161223] power_supply_is_system_supplied+0x3c/0x70 [ 196.161243] amd_pmf_get_power_source+0xe/0x20 [amd_pmf] [ 196.161276] amd_pmf_power_slider_update_event+0x49/0x90 [amd_pmf] [ 196.161310] amd_pmf_pwr_src_notify_call+0xe7/0x100 [amd_pmf] [ 196.161340] notifier_call_chain+0x5f/0xe0 [ 196.161362] atomic_notifier_call_chain+0x33/0x60 [ 196.161378] power_supply_changed_work+0x84/0x110 [ 196.161394] process_one_work+0x178/0x360 [ 196.161412] ? __pfx_worker_thread+0x10/0x10 [ 196.161424] worker_thread+0x307/0x430 [ 196.161440] ? __pfx_worker_thread+0x10/0x10 [ 196.161451] kthread+0xf4/0x130 [ 196.161467] ? __pfx_kthread+0x10/0x10 [ 196.161486] ret_from_fork+0x43/0x70 [ 196.161502] ? __pfx_kthread+0x10/0x10 [ 196.161518] ret_from_fork_asm+0x1b/0x30 [ 196.161558] [ 196.161562] ---[ end trace 0000000000000000 ]--- Since there's no guarantee that all the callbacks can work in atomic context, switch to use blocking_notifier_call_chain to relax the constraint. Signed-off-by: Kai-Heng Feng Reported-by: Allen Zhong Fixes: 4c71ae414474 ("platform/x86/amd/pmf: Add support SPS PMF feature") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217571 Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20230913033233.602986-1-kai.heng.feng@canonical.com Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index a427f13c757f..85b86768c0b9 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -767,7 +767,7 @@ struct power_supply_battery_info { int bti_resistance_tolerance; }; -extern struct atomic_notifier_head power_supply_notifier; +extern struct blocking_notifier_head power_supply_notifier; extern int power_supply_reg_notifier(struct notifier_block *nb); extern void power_supply_unreg_notifier(struct notifier_block *nb); #if IS_ENABLED(CONFIG_POWER_SUPPLY) -- cgit v1.2.3 From d090ec0df81e56556af3a2bf04a7e89347ae5784 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Thu, 31 Aug 2023 03:31:28 -0300 Subject: smp: Change function signatures to use call_single_data_t call_single_data_t is a size-aligned typedef of struct __call_single_data. This alignment is desirable in order to have smp_call_function*() avoid bouncing an extra cacheline in case of an unaligned csd, given this would hurt performance. Since the removal of struct request->csd in commit 660e802c76c8 ("blk-mq: use percpu csd to remote complete instead of per-rq csd") there are no current users of smp_call_function*() with unaligned csd. Change every 'struct __call_single_data' function parameter to 'call_single_data_t', so we have warnings if any new code tries to introduce an smp_call_function*() call with unaligned csd. Signed-off-by: Leonardo Bras Reviewed-by: Guo Ren Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230831063129.335425-1-leobras@redhat.com --- include/linux/smp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 91ea4a67f8ca..e87520dc2959 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -53,7 +53,7 @@ int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, void *info, bool wait, const struct cpumask *mask); -int smp_call_function_single_async(int cpu, struct __call_single_data *csd); +int smp_call_function_single_async(int cpu, call_single_data_t *csd); /* * Cpus stopping functions in panic. All have default weak definitions. -- cgit v1.2.3 From 85be6d842447067ce76047a14d4258c96fd33b7b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Aug 2023 12:52:04 +0200 Subject: cleanup: Make no_free_ptr() __must_check recent discussion brought about the realization that it makes sense for no_free_ptr() to have __must_check semantics in order to avoid leaking the resource. Additionally, add a few comments to clarify why/how things work. All credit to Linus on how to combine __must_check and the stmt-expression. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20230816103102.GF980931@hirez.programming.kicks-ass.net --- include/linux/cleanup.h | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 53f1a7a932b0..9f1a9c455b68 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -7,8 +7,9 @@ /* * DEFINE_FREE(name, type, free): * simple helper macro that defines the required wrapper for a __free() - * based cleanup function. @free is an expression using '_T' to access - * the variable. + * based cleanup function. @free is an expression using '_T' to access the + * variable. @free should typically include a NULL test before calling a + * function, see the example below. * * __free(name): * variable attribute to add a scoped based cleanup to the variable. @@ -17,6 +18,9 @@ * like a non-atomic xchg(var, NULL), such that the cleanup function will * be inhibited -- provided it sanely deals with a NULL value. * + * NOTE: this has __must_check semantics so that it is harder to accidentally + * leak the resource. + * * return_ptr(p): * returns p while inhibiting the __free(). * @@ -24,6 +28,8 @@ * * DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) * + * void *alloc_obj(...) + * { * struct obj *p __free(kfree) = kmalloc(...); * if (!p) * return NULL; @@ -32,6 +38,24 @@ * return NULL; * * return_ptr(p); + * } + * + * NOTE: the DEFINE_FREE()'s @free expression includes a NULL test even though + * kfree() is fine to be called with a NULL value. This is on purpose. This way + * the compiler sees the end of our alloc_obj() function as: + * + * tmp = p; + * p = NULL; + * if (p) + * kfree(p); + * return tmp; + * + * And through the magic of value-propagation and dead-code-elimination, it + * eliminates the actual cleanup call and compiles into: + * + * return p; + * + * Without the NULL test it turns into a mess and the compiler can't help us. */ #define DEFINE_FREE(_name, _type, _free) \ @@ -39,8 +63,17 @@ #define __free(_name) __cleanup(__free_##_name) +#define __get_and_null_ptr(p) \ + ({ __auto_type __ptr = &(p); \ + __auto_type __val = *__ptr; \ + *__ptr = NULL; __val; }) + +static inline __must_check +const volatile void * __must_check_fn(const volatile void *val) +{ return val; } + #define no_free_ptr(p) \ - ({ __auto_type __ptr = (p); (p) = NULL; __ptr; }) + ((typeof(p)) __must_check_fn(__get_and_null_ptr(p))) #define return_ptr(p) return no_free_ptr(p) -- cgit v1.2.3 From 6fb45460615358157a6d3c990e74f9c1395247e2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Jun 2023 20:45:16 +0200 Subject: sched: Simplify tg_set_cfs_bandwidth() Use guards to reduce gotos and simplify control flow. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 0abd60a7987b..f19f56501809 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -153,6 +153,8 @@ static inline int remove_cpu(unsigned int cpu) { return -EPERM; } static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } #endif /* !CONFIG_HOTPLUG_CPU */ +DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock()) + #ifdef CONFIG_PM_SLEEP_SMP extern int freeze_secondary_cpus(int primary); extern void thaw_secondary_cpus(void); -- cgit v1.2.3 From 89de9921dfa77e43b985bde99a6031ab66511020 Mon Sep 17 00:00:00 2001 From: Paul M Stillwell Jr Date: Wed, 6 Sep 2023 13:57:01 -0600 Subject: virtchnl: Add CRC stripping capability Some VFs may want to disable CRC stripping on incoming packets so create an offload for that. The VF already sends information about configuring its RX queues so use that structure to indicate that the CRC stripping should be enabled or not. Signed-off-by: Paul M Stillwell Jr Reviewed-by: Jesse Brandeburg Reviewed-by: Paul Menzel Signed-off-by: Ahmed Zaki Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index d0807ad43f93..dd71d3009771 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -240,6 +240,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES BIT(6) /* used to negotiate communicating link speeds in Mbps */ #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED BIT(7) +#define VIRTCHNL_VF_OFFLOAD_CRC BIT(10) #define VIRTCHNL_VF_OFFLOAD_VLAN_V2 BIT(15) #define VIRTCHNL_VF_OFFLOAD_VLAN BIT(16) #define VIRTCHNL_VF_OFFLOAD_RX_POLLING BIT(17) @@ -295,7 +296,13 @@ VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info); /* VIRTCHNL_OP_CONFIG_RX_QUEUE * VF sends this message to set up parameters for one RX queue. * External data buffer contains one instance of virtchnl_rxq_info. - * PF configures requested queue and returns a status code. + * PF configures requested queue and returns a status code. The + * crc_disable flag disables CRC stripping on the VF. Setting + * the crc_disable flag to 1 will disable CRC stripping for each + * queue in the VF where the flag is set. The VIRTCHNL_VF_OFFLOAD_CRC + * offload must have been set prior to sending this info or the PF + * will ignore the request. This flag should be set the same for + * all of the queues for a VF. */ /* Rx queue config info */ @@ -307,7 +314,7 @@ struct virtchnl_rxq_info { u16 splithdr_enabled; /* deprecated with AVF 1.0 */ u32 databuffer_size; u32 max_pkt_size; - u8 pad0; + u8 crc_disable; u8 rxdid; u8 pad1[2]; u64 dma_ring_addr; -- cgit v1.2.3 From b193a78ddb5ee7dba074d3f28dc050069ba083c0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:34:40 -0400 Subject: NFS: Use the correct commit info in nfs_join_page_group() Ensure that nfs_clear_request_commit() updates the correct counters when it removes them from the commit list. Fixes: ed5d588fe47f ("NFS: Try to join page groups before an O_DIRECT retransmission") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index aa9f4c6ebe26..1c315f854ea8 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -157,7 +157,9 @@ extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); extern int nfs_page_group_lock_subrequests(struct nfs_page *head); -extern void nfs_join_page_group(struct nfs_page *head, struct inode *inode); +extern void nfs_join_page_group(struct nfs_page *head, + struct nfs_commit_info *cinfo, + struct inode *inode); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); -- cgit v1.2.3 From 806a3bc421a115fbb287c1efce63a48c54ee804b Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 30 Aug 2023 15:29:34 -0400 Subject: NFSv4.1: fix pnfs MDS=DS session trunking Currently, when GETDEVICEINFO returns multiple locations where each is a different IP but the server's identity is same as MDS, then nfs4_set_ds_client() finds the existing nfs_client structure which has the MDS's max_connect value (and if it's 1), then the 1st IP on the DS's list will get dropped due to MDS trunking rules. Other IPs would be added as they fall under the pnfs trunking rules. For the list of IPs the 1st goes thru calling nfs4_set_ds_client() which will eventually call nfs4_add_trunk() and call into rpc_clnt_test_and_add_xprt() which has the check for MDS trunking. The other IPs (after the 1st one), would call rpc_clnt_add_xprt() which doesn't go thru that check. nfs4_add_trunk() is called when MDS trunking is happening and it needs to enforce the usage of max_connect mount option of the 1st mount. However, this shouldn't be applied to pnfs flow. Instead, this patch proposed to treat MDS=DS as DS trunking and make sure that MDS's max_connect limit does not apply to the 1st IP returned in the GETDEVICEINFO list. It does so by marking the newly created client with a new flag NFS_CS_PNFS which then used to pass max_connect value to use into the rpc_clnt_test_and_add_xprt() instead of the existing rpc client's max_connect value set by the MDS connection. For example, mount was done without max_connect value set so MDS's rpc client has cl_max_connect=1. Upon calling into rpc_clnt_test_and_add_xprt() and using rpc client's value, the caller passes in max_connect value which is previously been set in the pnfs path (as a part of handling GETDEVICEINFO list of IPs) in nfs4_set_ds_client(). However, when NFS_CS_PNFS flag is not set and we know we are doing MDS trunking, comparing a new IP of the same server, we then set the max_connect value to the existing MDS's value and pass that into rpc_clnt_test_and_add_xprt(). Fixes: dc48e0abee24 ("SUNRPC enforce creation of no more than max_connect xprts") Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 20eeba8b009d..cd628c4b011e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -48,6 +48,7 @@ struct nfs_client { #define NFS_CS_NOPING 6 /* - don't ping on connect */ #define NFS_CS_DS 7 /* - Server is a DS */ #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ +#define NFS_CS_PNFS 9 /* - Server used for pnfs */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ -- cgit v1.2.3 From e4c89f9380017b6b2e63836e2de1af8eb4535384 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 27 Aug 2023 23:14:04 +0200 Subject: lib/ucs2_string: Add UCS-2 strscpy function Add a ucs2_strscpy() function for UCS-2 strings. The behavior is equivalent to the standard strscpy() function, just for 16-bit character UCS-2 strings. Signed-off-by: Maximilian Luz Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230827211408.689076-2-luzmaximilian@gmail.com Signed-off-by: Bjorn Andersson --- include/linux/ucs2_string.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h index cf3ada3e820e..c499ae809c7d 100644 --- a/include/linux/ucs2_string.h +++ b/include/linux/ucs2_string.h @@ -10,6 +10,7 @@ typedef u16 ucs2_char_t; unsigned long ucs2_strnlen(const ucs2_char_t *s, size_t maxlength); unsigned long ucs2_strlen(const ucs2_char_t *s); unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength); +ssize_t ucs2_strscpy(ucs2_char_t *dst, const ucs2_char_t *src, size_t count); int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len); unsigned long ucs2_utf8size(const ucs2_char_t *src); -- cgit v1.2.3 From 00b1248606ba3979ccae30ed11df8cdc1a84245a Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 27 Aug 2023 23:14:05 +0200 Subject: firmware: qcom_scm: Add support for Qualcomm Secure Execution Environment SCM interface Add support for SCM calls to Secure OS and the Secure Execution Environment (SEE) residing in the TrustZone (TZ) via the QSEECOM interface. This allows communication with Secure/TZ applications, for example 'uefisecapp' managing access to UEFI variables. For better separation, make qcom_scm spin up a dedicated child (platform) device in case QSEECOM support has been detected. The corresponding driver for this device is then responsible for managing any QSEECOM clients. Specifically, this driver attempts to automatically detect known and supported applications, creating a client (auxiliary) device for each one. The respective client/auxiliary driver is then responsible for managing and communicating with the application. While this patch introduces only a very basic interface without the more advanced features (such as re-entrant and blocking SCM calls and listeners/callbacks), this is enough to talk to the aforementioned 'uefisecapp'. Signed-off-by: Maximilian Luz Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20230827211408.689076-3-luzmaximilian@gmail.com Signed-off-by: Bjorn Andersson --- include/linux/firmware/qcom/qcom_qseecom.h | 46 ++++++++++++++++++++++++++++++ include/linux/firmware/qcom/qcom_scm.h | 22 ++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 include/linux/firmware/qcom/qcom_qseecom.h (limited to 'include/linux') diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h new file mode 100644 index 000000000000..b531547e1dc9 --- /dev/null +++ b/include/linux/firmware/qcom/qcom_qseecom.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Driver for Qualcomm Secure Execution Environment (SEE) interface (QSEECOM). + * Responsible for setting up and managing QSEECOM client devices. + * + * Copyright (C) 2023 Maximilian Luz + */ +#include +#include + +#include + +/** + * struct qseecom_client - QSEECOM client device. + * @aux_dev: Underlying auxiliary device. + * @app_id: ID of the loaded application. + */ +struct qseecom_client { + struct auxiliary_device aux_dev; + u32 app_id; +}; + +/** + * qcom_qseecom_app_send() - Send to and receive data from a given QSEE app. + * @client: The QSEECOM client associated with the target app. + * @req: Request buffer sent to the app (must be DMA-mappable). + * @req_size: Size of the request buffer. + * @rsp: Response buffer, written to by the app (must be DMA-mappable). + * @rsp_size: Size of the response buffer. + * + * Sends a request to the QSEE app associated with the given client and read + * back its response. The caller must provide two DMA memory regions, one for + * the request and one for the response, and fill out the @req region with the + * respective (app-specific) request data. The QSEE app reads this and returns + * its response in the @rsp region. + * + * Note: This is a convenience wrapper around qcom_scm_qseecom_app_send(). + * Clients should prefer to use this wrapper. + * + * Return: Zero on success, nonzero on failure. + */ +static inline int qcom_qseecom_app_send(struct qseecom_client *client, void *req, size_t req_size, + void *rsp, size_t rsp_size) +{ + return qcom_scm_qseecom_app_send(client->app_id, req, req_size, rsp, rsp_size); +} diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index 0c091a3f6d49..e1ea2eb56d04 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -122,4 +122,26 @@ extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, extern int qcom_scm_lmh_profile_change(u32 profile_id); extern bool qcom_scm_lmh_dcvsh_available(void); +#ifdef CONFIG_QCOM_QSEECOM + +int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id); +int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp, + size_t rsp_size); + +#else /* CONFIG_QCOM_QSEECOM */ + +static inline int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id) +{ + return -EINVAL; +} + +static inline int qcom_scm_qseecom_app_send(u32 app_id, void *req, + size_t req_size, void *rsp, + size_t rsp_size) +{ + return -EINVAL; +} + +#endif /* CONFIG_QCOM_QSEECOM */ + #endif -- cgit v1.2.3 From b93c5fe16e4aa177cd072c1c4652cbe1b19a7812 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 3 Aug 2023 21:49:19 +0800 Subject: rcu: Remove unused function declaration rcu_eqs_special_set() Commit a86baa69c2b7 ("rcu: Remove special bit at the bottom of the ->dynticks counter") left behind this, remove it. Signed-off-by: Yue Haibing Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- include/linux/rcutree.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 126f6b418f6a..153cfc7bbffd 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -37,7 +37,6 @@ void synchronize_rcu_expedited(void); void kvfree_call_rcu(struct rcu_head *head, void *ptr); void rcu_barrier(void); -bool rcu_eqs_special_set(int cpu); void rcu_momentary_dyntick_idle(void); void kfree_rcu_scheduler_running(void); bool rcu_gp_might_be_stalled(void); -- cgit v1.2.3 From 6e284c55fc0bef7d25fd34d29db11f483da60ea4 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 5 Aug 2023 11:17:25 +0800 Subject: mm: Remove kmem_valid_obj() Function kmem_dump_obj() will splat if passed a pointer to a non-slab object. So nothing calls it directly, instead calling kmem_valid_obj() first to determine whether the passed pointer to a valid slab object. This means that merging kmem_valid_obj() into kmem_dump_obj() will make the code more concise. Therefore, convert kmem_dump_obj() to work the same way as vmalloc_dump_obj(), removing the need for the kmem_dump_obj() caller to check kmem_valid_obj(). After this, there are no remaining calls to kmem_valid_obj() anymore, and it can be safely removed. Suggested-by: Matthew Wilcox Signed-off-by: Zhen Lei Reviewed-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- include/linux/slab.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 8228d1276a2f..ff56ab804bf6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -245,8 +245,9 @@ DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) size_t ksize(const void *objp); #ifdef CONFIG_PRINTK -bool kmem_valid_obj(void *object); -void kmem_dump_obj(void *object); +bool kmem_dump_obj(void *object); +#else +static inline bool kmem_dump_obj(void *object) { return false; } #endif /* -- cgit v1.2.3 From 25cc71d1527b55c880d333e7cc1dc37aeef9843f Mon Sep 17 00:00:00 2001 From: Khadija Kamran Date: Wed, 23 Aug 2023 11:44:41 +0500 Subject: lsm: constify 'sb' parameter in security_quotactl() SELinux registers the implementation for the "quotactl" hook. Looking at the function implementation we observe that the parameter "sb" is not changing. Mark the "sb" parameter of LSM hook security_quotactl() as "const" since it will not be changing in the LSM hook. Signed-off-by: Khadija Kamran Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index ac962c4cb44b..b464f9c1894f 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -43,7 +43,7 @@ LSM_HOOK(int, 0, capset, struct cred *new, const struct cred *old, const kernel_cap_t *permitted) LSM_HOOK(int, 0, capable, const struct cred *cred, struct user_namespace *ns, int cap, unsigned int opts) -LSM_HOOK(int, 0, quotactl, int cmds, int type, int id, struct super_block *sb) +LSM_HOOK(int, 0, quotactl, int cmds, int type, int id, const struct super_block *sb) LSM_HOOK(int, 0, quota_on, struct dentry *dentry) LSM_HOOK(int, 0, syslog, int type) LSM_HOOK(int, 0, settime, const struct timespec64 *ts, diff --git a/include/linux/security.h b/include/linux/security.h index 5f16eecde00b..1a02e67e682f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -284,7 +284,7 @@ int security_capable(const struct cred *cred, struct user_namespace *ns, int cap, unsigned int opts); -int security_quotactl(int cmds, int type, int id, struct super_block *sb); +int security_quotactl(int cmds, int type, int id, const struct super_block *sb); int security_quota_on(struct dentry *dentry); int security_syslog(int type); int security_settime64(const struct timespec64 *ts, const struct timezone *tz); @@ -581,7 +581,7 @@ static inline int security_capable(const struct cred *cred, } static inline int security_quotactl(int cmds, int type, int id, - struct super_block *sb) + const struct super_block *sb) { return 0; } -- cgit v1.2.3 From 4a00c673068e72c12d243f5c31000246d6984e44 Mon Sep 17 00:00:00 2001 From: Khadija Kamran Date: Wed, 23 Aug 2023 12:17:29 +0500 Subject: lsm: constify 'file' parameter in security_bprm_creds_from_file() The 'bprm_creds_from_file' hook has implementation registered in commoncap. Looking at the function implementation we observe that the 'file' parameter is not changing. Mark the 'file' parameter of LSM hook security_bprm_creds_from_file() as 'const' since it will not be changing in the LSM hook. Signed-off-by: Khadija Kamran Signed-off-by: Paul Moore --- include/linux/fs.h | 2 +- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4aeb3fa11927..f69d085e531f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2450,7 +2450,7 @@ struct filename { }; static_assert(offsetof(struct filename, iname) % sizeof(long) == 0); -static inline struct mnt_idmap *file_mnt_idmap(struct file *file) +static inline struct mnt_idmap *file_mnt_idmap(const struct file *file) { return mnt_idmap(file->f_path.mnt); } diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index b464f9c1894f..5dfe67d69aba 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -50,7 +50,7 @@ LSM_HOOK(int, 0, settime, const struct timespec64 *ts, const struct timezone *tz) LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) -LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, struct file *file) +LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm) LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm) diff --git a/include/linux/security.h b/include/linux/security.h index 1a02e67e682f..edbea3e0a13f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -151,7 +151,7 @@ extern int cap_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file); +extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file); int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int cap_inode_removexattr(struct mnt_idmap *idmap, @@ -290,7 +290,7 @@ int security_syslog(int type); int security_settime64(const struct timespec64 *ts, const struct timezone *tz); int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_bprm_creds_for_exec(struct linux_binprm *bprm); -int security_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file); +int security_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file); int security_bprm_check(struct linux_binprm *bprm); void security_bprm_committing_creds(struct linux_binprm *bprm); void security_bprm_committed_creds(struct linux_binprm *bprm); @@ -613,7 +613,7 @@ static inline int security_bprm_creds_for_exec(struct linux_binprm *bprm) } static inline int security_bprm_creds_from_file(struct linux_binprm *bprm, - struct file *file) + const struct file *file) { return cap_bprm_creds_from_file(bprm, file); } -- cgit v1.2.3 From 64fc9526147c7fc14535134d8ea79b9c8dc549a7 Mon Sep 17 00:00:00 2001 From: Khadija Kamran Date: Wed, 23 Aug 2023 12:47:56 +0500 Subject: lsm: constify 'bprm' parameter in security_bprm_committing_creds() The 'bprm_committing_creds' hook has implementations registered in SELinux and Apparmor. Looking at the function implementations we observe that the 'bprm' parameter is not changing. Mark the 'bprm' parameter of LSM hook security_bprm_committing_creds() as 'const' since it will not be changing in the LSM hook. Signed-off-by: Khadija Kamran Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 5dfe67d69aba..f6acc3ed66a3 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -52,7 +52,7 @@ LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) -LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm) +LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, const struct linux_binprm *bprm) LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm) LSM_HOOK(int, 0, fs_context_submount, struct fs_context *fc, struct super_block *reference) LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, diff --git a/include/linux/security.h b/include/linux/security.h index edbea3e0a13f..885053f81019 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -292,7 +292,7 @@ int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_bprm_creds_for_exec(struct linux_binprm *bprm); int security_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file); int security_bprm_check(struct linux_binprm *bprm); -void security_bprm_committing_creds(struct linux_binprm *bprm); +void security_bprm_committing_creds(const struct linux_binprm *bprm); void security_bprm_committed_creds(struct linux_binprm *bprm); int security_fs_context_submount(struct fs_context *fc, struct super_block *reference); int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc); @@ -623,7 +623,7 @@ static inline int security_bprm_check(struct linux_binprm *bprm) return 0; } -static inline void security_bprm_committing_creds(struct linux_binprm *bprm) +static inline void security_bprm_committing_creds(const struct linux_binprm *bprm) { } -- cgit v1.2.3 From 81b36803ac139827538ac5ce4028e750a3c53f53 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:21 +0000 Subject: udp: introduce udp->udp_flags According to syzbot, it is time to use proper atomic flags for various UDP flags. Add udp_flags field, and convert udp->corkflag to first bit in it. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 43c1fb2d2c21..23f0693e0d9c 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -32,14 +32,20 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) return (num + net_hash_mix(net)) & mask; } +enum { + UDP_FLAGS_CORK, /* Cork is required */ +}; + struct udp_sock { /* inet_sock has to be the first member */ struct inet_sock inet; #define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0] #define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] #define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node + + unsigned long udp_flags; + int pending; /* Any pending frames ? */ - unsigned int corkflag; /* Cork is required */ __u8 encap_type; /* Is this an Encapsulation socket? */ unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ @@ -51,6 +57,11 @@ struct udp_sock { gro_enabled:1, /* Request GRO aggregation */ accept_udp_l4:1, accept_udp_fraglist:1; +/* indicator bits used by pcflag: */ +#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ +#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ +#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ + __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -62,12 +73,6 @@ struct udp_sock { */ __u16 pcslen; __u16 pcrlen; -/* indicator bits used by pcflag: */ -#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ -#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ -#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ - __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ - __u8 unused[3]; /* * For encapsulation sockets. */ @@ -95,6 +100,15 @@ struct udp_sock { int forward_threshold; }; +#define udp_test_bit(nr, sk) \ + test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_set_bit(nr, sk) \ + set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_clear_bit(nr, sk) \ + clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_assign_bit(nr, sk, val) \ + assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val) + #define UDP_MAX_SEGMENTS (1 << 6UL) #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) -- cgit v1.2.3 From a0002127cd746fcaa182ad3386ef6931c37f3bda Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:22 +0000 Subject: udp: move udp->no_check6_tx to udp->udp_flags syzbot reported that udp->no_check6_tx can be read locklessly. Use one atomic bit from udp->udp_flags Fixes: 1c19448c9ba6 ("net: Make enabling of zero UDP6 csums more restrictive") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 23f0693e0d9c..e3f2a6c7ac1d 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -34,6 +34,7 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) enum { UDP_FLAGS_CORK, /* Cork is required */ + UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ }; struct udp_sock { @@ -47,8 +48,7 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ - no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ + unsigned char no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ encap_enabled:1, /* This socket enabled encap * processing; UDP tunnels and * different encapsulation layer set @@ -115,7 +115,7 @@ struct udp_sock { static inline void udp_set_no_check6_tx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_tx = val; + udp_assign_bit(NO_CHECK6_TX, sk, val); } static inline void udp_set_no_check6_rx(struct sock *sk, bool val) @@ -123,9 +123,9 @@ static inline void udp_set_no_check6_rx(struct sock *sk, bool val) udp_sk(sk)->no_check6_rx = val; } -static inline bool udp_get_no_check6_tx(struct sock *sk) +static inline bool udp_get_no_check6_tx(const struct sock *sk) { - return udp_sk(sk)->no_check6_tx; + return udp_test_bit(NO_CHECK6_TX, sk); } static inline bool udp_get_no_check6_rx(struct sock *sk) -- cgit v1.2.3 From bcbc1b1de884647aa0318bf74eb7f293d72a1e40 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:23 +0000 Subject: udp: move udp->no_check6_rx to udp->udp_flags syzbot reported that udp->no_check6_rx can be read locklessly. Use one atomic bit from udp->udp_flags. Fixes: 1c19448c9ba6 ("net: Make enabling of zero UDP6 csums more restrictive") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index e3f2a6c7ac1d..8d4c3835b1b2 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -35,6 +35,7 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) enum { UDP_FLAGS_CORK, /* Cork is required */ UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ + UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */ }; struct udp_sock { @@ -48,8 +49,7 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ - encap_enabled:1, /* This socket enabled encap + unsigned char encap_enabled:1, /* This socket enabled encap * processing; UDP tunnels and * different encapsulation layer set * this @@ -120,7 +120,7 @@ static inline void udp_set_no_check6_tx(struct sock *sk, bool val) static inline void udp_set_no_check6_rx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_rx = val; + udp_assign_bit(NO_CHECK6_RX, sk, val); } static inline bool udp_get_no_check6_tx(const struct sock *sk) @@ -128,9 +128,9 @@ static inline bool udp_get_no_check6_tx(const struct sock *sk) return udp_test_bit(NO_CHECK6_TX, sk); } -static inline bool udp_get_no_check6_rx(struct sock *sk) +static inline bool udp_get_no_check6_rx(const struct sock *sk) { - return udp_sk(sk)->no_check6_rx; + return udp_test_bit(NO_CHECK6_RX, sk); } static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, -- cgit v1.2.3 From e1dc0615c6b08ef36414f08c011965b8fb56198b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:24 +0000 Subject: udp: move udp->gro_enabled to udp->udp_flags syzbot reported that udp->gro_enabled can be read locklessly. Use one atomic bit from udp->udp_flags. Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 8d4c3835b1b2..b344bd2e41fc 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -36,6 +36,7 @@ enum { UDP_FLAGS_CORK, /* Cork is required */ UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */ + UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */ }; struct udp_sock { @@ -54,7 +55,6 @@ struct udp_sock { * different encapsulation layer set * this */ - gro_enabled:1, /* Request GRO aggregation */ accept_udp_l4:1, accept_udp_fraglist:1; /* indicator bits used by pcflag: */ -- cgit v1.2.3 From f5f52f0884a595ff99ab1a608643fe4025fca2d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:26 +0000 Subject: udp: move udp->accept_udp_{l4|fraglist} to udp->udp_flags These are read locklessly, move them to udp_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index b344bd2e41fc..bb2b87adfbea 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -37,6 +37,8 @@ enum { UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */ UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */ + UDP_FLAGS_ACCEPT_FRAGLIST, + UDP_FLAGS_ACCEPT_L4, }; struct udp_sock { @@ -50,13 +52,11 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char encap_enabled:1, /* This socket enabled encap + unsigned char encap_enabled:1; /* This socket enabled encap * processing; UDP tunnels and * different encapsulation layer set * this */ - accept_udp_l4:1, - accept_udp_fraglist:1; /* indicator bits used by pcflag: */ #define UDPLITE_BIT 0x1 /* set by udplite proto init function */ #define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ @@ -149,10 +149,12 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) if (!skb_is_gso(skb)) return false; - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4) + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + !udp_test_bit(ACCEPT_L4, sk)) return true; - if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist) + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && + !udp_test_bit(ACCEPT_FRAGLIST, sk)) return true; return false; @@ -160,8 +162,8 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) static inline void udp_allow_gso(struct sock *sk) { - udp_sk(sk)->accept_udp_l4 = 1; - udp_sk(sk)->accept_udp_fraglist = 1; + udp_set_bit(ACCEPT_L4, sk); + udp_set_bit(ACCEPT_FRAGLIST, sk); } #define udp_portaddr_for_each_entry(__sk, list) \ -- cgit v1.2.3 From ac9a7f4ce5dda1472e8f44096f33066c6ec1a3b4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:27 +0000 Subject: udp: lockless UDP_ENCAP_L2TPINUDP / UDP_GRO Move udp->encap_enabled to udp->udp_flags. Add udp_test_and_set_bit() helper to allow lockless udp_tunnel_encap_enable() implementation. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index bb2b87adfbea..0cf83270a4a2 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -39,6 +39,7 @@ enum { UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */ UDP_FLAGS_ACCEPT_FRAGLIST, UDP_FLAGS_ACCEPT_L4, + UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */ }; struct udp_sock { @@ -52,11 +53,7 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char encap_enabled:1; /* This socket enabled encap - * processing; UDP tunnels and - * different encapsulation layer set - * this - */ + /* indicator bits used by pcflag: */ #define UDPLITE_BIT 0x1 /* set by udplite proto init function */ #define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ @@ -104,6 +101,8 @@ struct udp_sock { test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) #define udp_set_bit(nr, sk) \ set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_test_and_set_bit(nr, sk) \ + test_and_set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) #define udp_clear_bit(nr, sk) \ clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) #define udp_assign_bit(nr, sk, val) \ -- cgit v1.2.3 From 729549aa350c56a777bb342941ed4d69b6585769 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:29 +0000 Subject: udplite: remove UDPLITE_BIT This flag is set but never read, we can remove it. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 0cf83270a4a2..58156edec009 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -55,9 +55,8 @@ struct udp_sock { __u8 encap_type; /* Is this an Encapsulation socket? */ /* indicator bits used by pcflag: */ -#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ -#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ -#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ +#define UDPLITE_SEND_CC 0x1 /* set via udplite setsockopt */ +#define UDPLITE_RECV_CC 0x2 /* set via udplite setsocktopt */ __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ /* * Following member retains the information to create a UDP header -- cgit v1.2.3 From 882af43a0fc37e26d85fb0df0c9edd3bed928de4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:30 +0000 Subject: udplite: fix various data-races udp->pcflag, udp->pcslen and udp->pcrlen reads/writes are racy. Move udp->pcflag to udp->udp_flags for atomicity, and add READ_ONCE()/WRITE_ONCE() annotations for pcslen and pcrlen. Fixes: ba4e58eca8aa ("[NET]: Supporting UDP-Lite (RFC 3828) in Linux") Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 58156edec009..d04188714dca 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -40,6 +40,8 @@ enum { UDP_FLAGS_ACCEPT_FRAGLIST, UDP_FLAGS_ACCEPT_L4, UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */ + UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */ + UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */ }; struct udp_sock { @@ -54,10 +56,6 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ -/* indicator bits used by pcflag: */ -#define UDPLITE_SEND_CC 0x1 /* set via udplite setsockopt */ -#define UDPLITE_RECV_CC 0x2 /* set via udplite setsocktopt */ - __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. -- cgit v1.2.3 From 2758ac3a11d78af56e6969af04dec611806a62de Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 13 Sep 2023 21:28:25 +0200 Subject: firmware: qcom-scm: drop unneeded 'extern' specifiers The 'extern' specifier in front of a function declaration has no effect. Remove all of them from the qcom-scm header. Signed-off-by: Bartosz Golaszewski Reviewed-by: Krzysztof Kozlowski Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230913192826.36187-1-bartosz.golaszewski@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/firmware/qcom/qcom_scm.h | 101 +++++++++++++++------------------ 1 file changed, 47 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index e1ea2eb56d04..ccaf28846054 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -59,12 +59,12 @@ enum qcom_scm_ice_cipher { #define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE) #define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC) -extern bool qcom_scm_is_available(void); +bool qcom_scm_is_available(void); -extern int qcom_scm_set_cold_boot_addr(void *entry); -extern int qcom_scm_set_warm_boot_addr(void *entry); -extern void qcom_scm_cpu_power_down(u32 flags); -extern int qcom_scm_set_remote_state(u32 state, u32 id); +int qcom_scm_set_cold_boot_addr(void *entry); +int qcom_scm_set_warm_boot_addr(void *entry); +void qcom_scm_cpu_power_down(u32 flags); +int qcom_scm_set_remote_state(u32 state, u32 id); struct qcom_scm_pas_metadata { void *ptr; @@ -72,55 +72,48 @@ struct qcom_scm_pas_metadata { ssize_t size; }; -extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, - size_t size, - struct qcom_scm_pas_metadata *ctx); -extern void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx); -extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, - phys_addr_t size); -extern int qcom_scm_pas_auth_and_reset(u32 peripheral); -extern int qcom_scm_pas_shutdown(u32 peripheral); -extern bool qcom_scm_pas_supported(u32 peripheral); - -extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); -extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); - -extern bool qcom_scm_restore_sec_cfg_available(void); -extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); -extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); -extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); -extern int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); -extern int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, - u32 cp_nonpixel_start, - u32 cp_nonpixel_size); -extern int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, - u64 *src, - const struct qcom_scm_vmperm *newvm, - unsigned int dest_cnt); - -extern bool qcom_scm_ocmem_lock_available(void); -extern int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size, u32 mode); -extern int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size); - -extern bool qcom_scm_ice_available(void); -extern int qcom_scm_ice_invalidate_key(u32 index); -extern int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, - enum qcom_scm_ice_cipher cipher, - u32 data_unit_size); - -extern bool qcom_scm_hdcp_available(void); -extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, - u32 *resp); - -extern int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); -extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en); - -extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, - u64 limit_node, u32 node_id, u64 version); -extern int qcom_scm_lmh_profile_change(u32 profile_id); -extern bool qcom_scm_lmh_dcvsh_available(void); +int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, size_t size, + struct qcom_scm_pas_metadata *ctx); +void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx); +int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, phys_addr_t size); +int qcom_scm_pas_auth_and_reset(u32 peripheral); +int qcom_scm_pas_shutdown(u32 peripheral); +bool qcom_scm_pas_supported(u32 peripheral); + +int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); +int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); + +bool qcom_scm_restore_sec_cfg_available(void); +int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); +int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); +int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); +int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); +int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, + u32 cp_nonpixel_start, u32 cp_nonpixel_size); +int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, u64 *src, + const struct qcom_scm_vmperm *newvm, + unsigned int dest_cnt); + +bool qcom_scm_ocmem_lock_available(void); +int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, u32 size, + u32 mode); +int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, u32 size); + +bool qcom_scm_ice_available(void); +int qcom_scm_ice_invalidate_key(u32 index); +int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, + enum qcom_scm_ice_cipher cipher, u32 data_unit_size); + +bool qcom_scm_hdcp_available(void); +int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp); + +int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); +int qcom_scm_qsmmu500_wait_safe_toggle(bool en); + +int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, + u64 limit_node, u32 node_id, u64 version); +int qcom_scm_lmh_profile_change(u32 profile_id); +bool qcom_scm_lmh_dcvsh_available(void); #ifdef CONFIG_QCOM_QSEECOM -- cgit v1.2.3 From a721f7b8c3548e943e514a957f2a37f4763b9888 Mon Sep 17 00:00:00 2001 From: Khadija Kamran Date: Wed, 23 Aug 2023 13:16:40 +0500 Subject: lsm: constify 'bprm' parameter in security_bprm_committed_creds() Three LSMs register the implementations for the 'bprm_committed_creds()' hook: AppArmor, SELinux and tomoyo. Looking at the function implementations we may observe that the 'bprm' parameter is not changing. Mark the 'bprm' parameter of LSM hook security_bprm_committed_creds() as 'const' since it will not be changing in the LSM hook. Signed-off-by: Khadija Kamran [PM: minor merge fuzzing due to other constification patches] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index f6acc3ed66a3..3b0f5cfca464 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -53,7 +53,7 @@ LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, const struct linux_binprm *bprm) -LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm) +LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, const struct linux_binprm *bprm) LSM_HOOK(int, 0, fs_context_submount, struct fs_context *fc, struct super_block *reference) LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, struct fs_context *src_sc) diff --git a/include/linux/security.h b/include/linux/security.h index 885053f81019..3148103123fb 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -293,7 +293,7 @@ int security_bprm_creds_for_exec(struct linux_binprm *bprm); int security_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file); int security_bprm_check(struct linux_binprm *bprm); void security_bprm_committing_creds(const struct linux_binprm *bprm); -void security_bprm_committed_creds(struct linux_binprm *bprm); +void security_bprm_committed_creds(const struct linux_binprm *bprm); int security_fs_context_submount(struct fs_context *fc, struct super_block *reference); int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc); int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param); @@ -627,7 +627,7 @@ static inline void security_bprm_committing_creds(const struct linux_binprm *bpr { } -static inline void security_bprm_committed_creds(struct linux_binprm *bprm) +static inline void security_bprm_committed_creds(const struct linux_binprm *bprm) { } -- cgit v1.2.3 From 20a2aa47097aae7016209c4dbe392b3b25e0d883 Mon Sep 17 00:00:00 2001 From: Khadija Kamran Date: Wed, 23 Aug 2023 14:01:28 +0500 Subject: lsm: constify 'sb' parameter in security_sb_kern_mount() The "sb_kern_mount" hook has implementation registered in SELinux. Looking at the function implementation we observe that the "sb" parameter is not changing. Mark the "sb" parameter of LSM hook security_sb_kern_mount() as "const" since it will not be changing in the LSM hook. Signed-off-by: Khadija Kamran [PM: minor merge fuzzing due to other constification patches] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 3b0f5cfca464..99b8176c3738 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -66,7 +66,7 @@ LSM_HOOK(void, LSM_RET_VOID, sb_free_mnt_opts, void *mnt_opts) LSM_HOOK(int, 0, sb_eat_lsm_opts, char *orig, void **mnt_opts) LSM_HOOK(int, 0, sb_mnt_opts_compat, struct super_block *sb, void *mnt_opts) LSM_HOOK(int, 0, sb_remount, struct super_block *sb, void *mnt_opts) -LSM_HOOK(int, 0, sb_kern_mount, struct super_block *sb) +LSM_HOOK(int, 0, sb_kern_mount, const struct super_block *sb) LSM_HOOK(int, 0, sb_show_options, struct seq_file *m, struct super_block *sb) LSM_HOOK(int, 0, sb_statfs, struct dentry *dentry) LSM_HOOK(int, 0, sb_mount, const char *dev_name, const struct path *path, diff --git a/include/linux/security.h b/include/linux/security.h index 3148103123fb..1d1df326c881 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -304,7 +304,7 @@ void security_free_mnt_opts(void **mnt_opts); int security_sb_eat_lsm_opts(char *options, void **mnt_opts); int security_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts); int security_sb_remount(struct super_block *sb, void *mnt_opts); -int security_sb_kern_mount(struct super_block *sb); +int security_sb_kern_mount(const struct super_block *sb); int security_sb_show_options(struct seq_file *m, struct super_block *sb); int security_sb_statfs(struct dentry *dentry); int security_sb_mount(const char *dev_name, const struct path *path, -- cgit v1.2.3 From 4de7b17fd05d03fa919e8c47fc66122bd24d7b6c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 21 Aug 2023 14:44:28 +0100 Subject: sched: Assert for_each_thread() is properly locked list_for_each_entry_rcu() takes an optional fourth argument which allows RCU to assert that the correct lock is held. Several callers of for_each_thread() rely on their caller to be holding the appropriate lock, so this is a useful assertion to include. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Ingo Molnar Reviewed-by: Joel Fernandes (Google) Link: https://lore.kernel.org/r/20230821134428.2504912-1-willy@infradead.org --- include/linux/sched/signal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0014d3adaf84..9610bad018a3 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -656,7 +656,8 @@ extern bool current_is_single_threaded(void); while ((t = next_thread(t)) != g) #define __for_each_thread(signal, t) \ - list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) + list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node, \ + lockdep_is_held(&tasklist_lock)) #define for_each_thread(p, t) \ __for_each_thread((p)->signal, t) -- cgit v1.2.3 From b0adfba7ee770fef20b1b6d86706c28f7fccfb07 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:01:59 +0000 Subject: ipv6: lockless IPV6_UNICAST_HOPS implementation Some np->hop_limit accesses are racy, when socket lock is not held. Add missing annotations and switch to full lockless implementation. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index af8a771a053c..c2e087071384 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -213,17 +213,7 @@ struct ipv6_pinfo { __be32 flow_label; __u32 frag_size; - /* - * Packed in 16bits. - * Omit one shift by putting the signed field at MSB. - */ -#if defined(__BIG_ENDIAN_BITFIELD) - __s16 hop_limit:9; - __u16 __unused_1:7; -#else - __u16 __unused_1:7; - __s16 hop_limit:9; -#endif + s16 hop_limit; #if defined(__BIG_ENDIAN_BITFIELD) /* Packed in 16bits. */ -- cgit v1.2.3 From d986f52124e062753e33b6fe303be5904a997eac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:00 +0000 Subject: ipv6: lockless IPV6_MULTICAST_LOOP implementation Add inet6_{test|set|clear|assign}_bit() helpers. Note that I am using bits from inet->inet_flags, this might change in the future if we need more flags. While solving data-races accessing np->mc_loop, this patch also allows to implement lockless accesses to np->mcast_hops in the following patch. Also constify sk_mc_loop() argument. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c2e087071384..68cf1ca94914 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -218,11 +218,9 @@ struct ipv6_pinfo { #if defined(__BIG_ENDIAN_BITFIELD) /* Packed in 16bits. */ __s16 mcast_hops:9; - __u16 __unused_2:6, - mc_loop:1; + __u16 __unused_2:7, #else - __u16 mc_loop:1, - __unused_2:6; + __u16 __unused_2:7; __s16 mcast_hops:9; #endif int ucast_oif; @@ -283,6 +281,18 @@ struct ipv6_pinfo { struct inet6_cork cork; }; +/* We currently use available bits from inet_sk(sk)->inet_flags, + * this could change in the future. + */ +#define inet6_test_bit(nr, sk) \ + test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_set_bit(nr, sk) \ + set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_clear_bit(nr, sk) \ + clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_assign_bit(nr, sk, val) \ + assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val) + /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ struct raw6_sock { /* inet_sock has to be the first member of raw6_sock */ -- cgit v1.2.3 From 2da23eb07c91241d962f3ff05565065484cd8929 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:01 +0000 Subject: ipv6: lockless IPV6_MULTICAST_HOPS implementation This fixes data-races around np->mcast_hops, and make IPV6_MULTICAST_HOPS lockless. Note that np->mcast_hops is never negative, thus can fit an u8 field instead of s16. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 68cf1ca94914..9cc278b5e4f4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -214,15 +214,8 @@ struct ipv6_pinfo { __u32 frag_size; s16 hop_limit; + u8 mcast_hops; -#if defined(__BIG_ENDIAN_BITFIELD) - /* Packed in 16bits. */ - __s16 mcast_hops:9; - __u16 __unused_2:7, -#else - __u16 __unused_2:7; - __s16 mcast_hops:9; -#endif int ucast_oif; int mcast_oif; -- cgit v1.2.3 From dcae74622c051b219ee628669a31716473efda2c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:04 +0000 Subject: ipv6: lockless IPV6_RECVERR_RFC4884 implementation Move np->recverr_rfc4884 to an atomic flag to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 9cc278b5e4f4..0d2b0a1b2dae 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -256,7 +256,6 @@ struct ipv6_pinfo { autoflowlabel:1, autoflowlabel_set:1, mc_all:1, - recverr_rfc4884:1, rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; -- cgit v1.2.3 From 6559c0ff3bc27d7e4d447d31c1d7e8eae0e959f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:05 +0000 Subject: ipv6: lockless IPV6_MULTICAST_ALL implementation Move np->mc_all to an atomic flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 0d2b0a1b2dae..d88e91b7f0a3 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -255,7 +255,6 @@ struct ipv6_pinfo { dontfrag:1, autoflowlabel:1, autoflowlabel_set:1, - mc_all:1, rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; -- cgit v1.2.3 From 5121516b0c4736b7977d977b239e36d23ec64401 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:06 +0000 Subject: ipv6: lockless IPV6_AUTOFLOWLABEL implementation Move np->autoflowlabel and np->autoflowlabel_set in inet->inet_flags, to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index d88e91b7f0a3..e3be5dc21b7d 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -253,8 +253,6 @@ struct ipv6_pinfo { * 100: prefer care-of address */ dontfrag:1, - autoflowlabel:1, - autoflowlabel_set:1, rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; -- cgit v1.2.3 From 1086ca7cce292bb498d7f8f85f4593c9ef4902b7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:07 +0000 Subject: ipv6: lockless IPV6_DONTFRAG implementation Move np->dontfrag flag to inet->inet_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e3be5dc21b7d..57d563f1d4b1 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -252,7 +252,6 @@ struct ipv6_pinfo { * 010: prefer public address * 100: prefer care-of address */ - dontfrag:1, rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; -- cgit v1.2.3 From 3fa29971c69519629370b119b0b618ee88ade6b9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:08 +0000 Subject: ipv6: lockless IPV6_RECVERR implemetation np->recverr is moved to inet->inet_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 57d563f1d4b1..53f4f1b97a78 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -243,8 +243,7 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u16 recverr:1, - sndflow:1, + __u16 sndflow:1, repflow:1, pmtudisc:3, padding:1, /* 1 bit hole */ -- cgit v1.2.3 From 3cccda8db2cf2f2a224d55d5b6e2251d478c58ca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:09 +0000 Subject: ipv6: move np->repflow to atomic flags Move np->repflow to inet->inet_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 53f4f1b97a78..e62413371ea4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -244,7 +244,6 @@ struct ipv6_pinfo { /* sockopt flags */ __u16 sndflow:1, - repflow:1, pmtudisc:3, padding:1, /* 1 bit hole */ srcprefs:3, /* 001: prefer temporary address -- cgit v1.2.3 From 83cd5eb654b320c1972254f243531f3f3cebcccf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:10 +0000 Subject: ipv6: lockless IPV6_ROUTER_ALERT_ISOLATE implementation Reads from np->rtalert_isolate are racy. Move this flag to inet->inet_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e62413371ea4..f288a35f157f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -246,11 +246,10 @@ struct ipv6_pinfo { __u16 sndflow:1, pmtudisc:3, padding:1, /* 1 bit hole */ - srcprefs:3, /* 001: prefer temporary address + srcprefs:3; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ - rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; -- cgit v1.2.3 From 6b724bc4300b431443f3b99520994a5aece347cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:11 +0000 Subject: ipv6: lockless IPV6_MTU_DISCOVER implementation Most np->pmtudisc reads are racy. Move this 3bit field on a full byte, add annotations and make IPV6_MTU_DISCOVER setsockopt() lockless. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index f288a35f157f..10f521a6a9c8 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -243,13 +243,12 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u16 sndflow:1, - pmtudisc:3, - padding:1, /* 1 bit hole */ + __u8 sndflow:1, srcprefs:3; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ + __u8 pmtudisc; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; -- cgit v1.2.3 From 859f8b265fc2a11af0fb0c52b4087e0409250592 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:12 +0000 Subject: ipv6: lockless IPV6_FLOWINFO_SEND implementation np->sndflow reads are racy. Use one bit ftom atomic inet->inet_flags instead, IPV6_FLOWINFO_SEND setsockopt() can be lockless. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 10f521a6a9c8..09253825c99c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -243,8 +243,7 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u8 sndflow:1, - srcprefs:3; /* 001: prefer temporary address + __u8 srcprefs:3; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ -- cgit v1.2.3 From 0a596b0682a7ce37e26c36629816f105c6459d06 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 26 Aug 2023 16:36:41 +0800 Subject: KEYS: Include linux/errno.h in linux/verification.h Add inclusion of linux/errno.h as otherwise the reference to EINVAL may be invalid. Fixes: f3cf4134c5c6 ("bpf: Add bpf_lookup_*_key() and bpf_key_put() kfuncs") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202308261414.HKw1Mrip-lkp@intel.com/ Signed-off-by: Herbert Xu --- include/linux/verification.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/verification.h b/include/linux/verification.h index f34e50ebcf60..cb2d47f28091 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -8,6 +8,7 @@ #ifndef _LINUX_VERIFICATION_H #define _LINUX_VERIFICATION_H +#include #include /* -- cgit v1.2.3 From b58a36008bfa1aadf55f516bcbfae40c779eb54b Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 6 Sep 2023 01:27:57 +0200 Subject: hwrng: bcm2835 - Fix hwrng throughput regression The last RCU stall fix caused a massive throughput regression of the hwrng on Raspberry Pi 0 - 3. hwrng_msleep doesn't sleep precisely enough and usleep_range doesn't allow scheduling. So try to restore the best possible throughput by introducing hwrng_yield which interruptable sleeps for one jiffy. Some performance measurements on Raspberry Pi 3B+ (arm64/defconfig): sudo dd if=/dev/hwrng of=/dev/null count=1 bs=10000 cpu_relax ~138025 Bytes / sec hwrng_msleep(1000) ~13 Bytes / sec hwrng_yield ~2510 Bytes / sec Fixes: 96cb9d055445 ("hwrng: bcm2835 - use hwrng_msleep() instead of cpu_relax()") Link: https://lore.kernel.org/linux-arm-kernel/bc97ece5-44a3-4c4e-77da-2db3eb66b128@gmx.net/ Signed-off-by: Stefan Wahren Reviewed-by: Jason A. Donenfeld Signed-off-by: Herbert Xu --- include/linux/hw_random.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 8a3115516a1b..136e9842120e 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -63,5 +63,6 @@ extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); extern long hwrng_msleep(struct hwrng *rng, unsigned int msecs); +extern long hwrng_yield(struct hwrng *rng); #endif /* LINUX_HWRANDOM_H_ */ -- cgit v1.2.3 From b1f099b1cf51d553c510c6c8141c27d9ba7ea1fe Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 19 Aug 2023 07:12:33 -0700 Subject: numa: Generalize numa_map_to_online_node() The function in fact searches the nearest node for a given one, based on a N_ONLINE state. This is a common pattern to search for a nearest node. This patch converts numa_map_to_online_node() to numa_nearest_node() so that others won't need to opencode the logic. Signed-off-by: Yury Norov Signed-off-by: Ingo Molnar Cc: Mel Gorman Link: https://lore.kernel.org/r/20230819141239.287290-2-yury.norov@gmail.com --- include/linux/numa.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/numa.h b/include/linux/numa.h index 59df211d051f..fb30a42f0700 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -25,7 +25,7 @@ #include /* Generic implementation available */ -int numa_map_to_online_node(int node); +int numa_nearest_node(int node, unsigned int state); #ifndef memory_add_physaddr_to_nid static inline int memory_add_physaddr_to_nid(u64 start) @@ -44,10 +44,11 @@ static inline int phys_to_target_node(u64 start) } #endif #else /* !CONFIG_NUMA */ -static inline int numa_map_to_online_node(int node) +static inline int numa_nearest_node(int node, unsigned int state) { return NUMA_NO_NODE; } + static inline int memory_add_physaddr_to_nid(u64 start) { return 0; @@ -58,6 +59,8 @@ static inline int phys_to_target_node(u64 start) } #endif +#define numa_map_to_online_node(node) numa_nearest_node(node, N_ONLINE) + #ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP extern const struct attribute_group arch_node_dev_group; #endif -- cgit v1.2.3 From 8ab63d418d4339d996f80d02a00dbce0aa3ff972 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 19 Aug 2023 07:12:36 -0700 Subject: sched/topology: Fix sched_numa_find_nth_cpu() in non-NUMA case When CONFIG_NUMA is enabled, sched_numa_find_nth_cpu() searches for a CPU in sched_domains_numa_masks. The masks includes only online CPUs, so effectively offline CPUs are skipped. When CONFIG_NUMA is disabled, the fallback function should be consistent. Fixes: cd7f55359c90 ("sched: add sched_numa_find_nth_cpu()") Signed-off-by: Yury Norov Signed-off-by: Ingo Molnar Cc: Mel Gorman Link: https://lore.kernel.org/r/20230819141239.287290-5-yury.norov@gmail.com --- include/linux/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index fea32377f7c7..52f5850730b3 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -251,7 +251,7 @@ extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int #else static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) { - return cpumask_nth(cpu, cpus); + return cpumask_nth_and(cpu, cpus, cpu_online_mask); } static inline const struct cpumask * -- cgit v1.2.3 From 8f908db77782630c45ba29dac35c434b5ce0b730 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 15 Sep 2023 10:34:27 -0700 Subject: bpf: Fix BTF_ID symbol generation collision Marcus and Satya reported an issue where BTF_ID macro generates same symbol in separate objects and that breaks final vmlinux link. ld.lld: error: ld-temp.o :14577:1: symbol '__BTF_ID__struct__cgroup__624' is already defined This can be triggered under specific configs when __COUNTER__ happens to be the same for the same symbol in two different translation units, which is already quite unlikely to happen. Add __LINE__ number suffix to make BTF_ID symbol more unique, which is not a complete fix, but it would help for now and meanwhile we can work on better solution as suggested by Andrii. Cc: stable@vger.kernel.org Reported-by: Satya Durga Srinivasu Prabhala Reported-by: Marcus Seyfarth Closes: https://github.com/ClangBuiltLinux/linux/issues/1913 Debugged-by: Nathan Chancellor Link: https://lore.kernel.org/bpf/CAEf4Bzb5KQ2_LmhN769ifMeSJaWfebccUasQOfQKaOd0nQ51tw@mail.gmail.com/ Signed-off-by: Jiri Olsa Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20230915-bpf_collision-v3-1-263fc519c21f@google.com Signed-off-by: Alexei Starovoitov --- include/linux/btf_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index a3462a9b8e18..a9cb10b0e2e9 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -49,7 +49,7 @@ word \ ____BTF_ID(symbol, word) #define __ID(prefix) \ - __PASTE(prefix, __COUNTER__) + __PASTE(__PASTE(prefix, __COUNTER__), __LINE__) /* * The BTF_ID defines unique symbol for each ID pointing -- cgit v1.2.3 From 32e4fa37fa667fdf53499b9de92737dc75199d8e Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Mon, 4 Sep 2023 14:13:50 +0200 Subject: cpu/hotplug: Remove unused cpuhp_state CPUHP_AP_X86_VDSO_VMA_ONLINE Commit b2e2ba578e01 ("x86/vdso: Initialize the CPU/node NR segment descriptor earlier") removed the single user of this constant. Remove it to reduce the size of cpuhp_hp_states[]. Signed-off-by: Olaf Hering Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230904121350.18055-1-olaf@aepfle.de --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 06dda85f0424..cd8bd6ed04f9 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -205,7 +205,6 @@ enum cpuhp_state { CPUHP_AP_KVM_ONLINE, CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, - CPUHP_AP_X86_VDSO_VMA_ONLINE, CPUHP_AP_IRQ_AFFINITY_ONLINE, CPUHP_AP_BLK_MQ_ONLINE, CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS, -- cgit v1.2.3 From c656a4d5484ad99e97de549a9affc12a91d94963 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 15 Sep 2023 15:46:08 -0400 Subject: Revert "SUNRPC: clean up integer overflow check" This reverts commit e87cf8a28e7592bd19064e8181324ae26bc02932. This commit was added to silence a tautological comparison warning, but removing the 'len' value check before calling xdr_inline_decode() is really not what we want. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5b4fb3c791bc..896a6d2a9cf0 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -779,7 +779,9 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; - p = xdr_inline_decode(xdr, size_mul(len, sizeof(*p))); + if (len > SIZE_MAX / sizeof(*p)) + return -EBADMSG; + p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) return -EBADMSG; if (array == NULL) -- cgit v1.2.3 From 993b5662f302628db4eb358d69b2720c88cbfaf0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Sep 2023 16:12:33 -0400 Subject: SUNRPC: Silence compiler complaints about tautological comparisons On 64-bit systems, the compiler will complain that the comparison between SIZE_MAX and the 32-bit unsigned int 'len' is unnecessary. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 896a6d2a9cf0..2f8dc47f1eb0 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -779,7 +779,7 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; - if (len > SIZE_MAX / sizeof(*p)) + if (U32_MAX >= SIZE_MAX / sizeof(*p) && len > SIZE_MAX / sizeof(*p)) return -EBADMSG; p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) -- cgit v1.2.3 From daabb2b098e04753fa3d1b1feed13e5a61bef61c Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 7 Sep 2023 23:05:49 +0000 Subject: bpf/tests: add tests for cpuv4 instructions The BPF JITs now support cpuv4 instructions. Add tests for these new instructions to the test suite: 1. Sign extended Load 2. Sign extended Mov 3. Unconditional byte swap 4. Unconditional jump with 32-bit offset 5. Signed division and modulo Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20230907230550.1417590-9-puranjay12@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 761af6b3cf2b..0138832ad571 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -117,21 +117,25 @@ struct ctl_table_header; /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ -#define BPF_ALU64_IMM(OP, DST, IMM) \ +#define BPF_ALU64_IMM_OFF(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ - .off = 0, \ + .off = OFF, \ .imm = IMM }) +#define BPF_ALU64_IMM(OP, DST, IMM) \ + BPF_ALU64_IMM_OFF(OP, DST, IMM, 0) -#define BPF_ALU32_IMM(OP, DST, IMM) \ +#define BPF_ALU32_IMM_OFF(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ - .off = 0, \ + .off = OFF, \ .imm = IMM }) +#define BPF_ALU32_IMM(OP, DST, IMM) \ + BPF_ALU32_IMM_OFF(OP, DST, IMM, 0) /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ @@ -143,6 +147,16 @@ struct ctl_table_header; .off = 0, \ .imm = LEN }) +/* Byte Swap, bswap16/32/64 */ + +#define BPF_BSWAP(DST, LEN) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_END | BPF_SRC(BPF_TO_LE), \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = LEN }) + /* Short form of mov, dst_reg = src_reg */ #define BPF_MOV64_REG(DST, SRC) \ @@ -179,6 +193,24 @@ struct ctl_table_header; .off = 0, \ .imm = IMM }) +/* Short form of movsx, dst_reg = (s8,s16,s32)src_reg */ + +#define BPF_MOVSX64_REG(DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +#define BPF_MOVSX32_REG(DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Special form of mov32, used for doing explicit zero extension on dst. */ #define BPF_ZEXT_REG(DST) \ ((struct bpf_insn) { \ @@ -263,6 +295,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .off = OFF, \ .imm = 0 }) +/* Memory load, dst_reg = *(signed size *) (src_reg + off16) */ + +#define BPF_LDX_MEMSX(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEMSX, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Memory store, *(uint *) (dst_reg + off16) = src_reg */ #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ -- cgit v1.2.3 From 3868ab0f192581eff978501a05f3dc2e01541d77 Mon Sep 17 00:00:00 2001 From: Aananth V Date: Thu, 14 Sep 2023 14:36:21 +0000 Subject: tcp: new TCP_INFO stats for RTO events The 2023 SIGCOMM paper "Improving Network Availability with Protective ReRoute" has indicated Linux TCP's RTO-triggered txhash rehashing can effectively reduce application disruption during outages. To better measure the efficacy of this feature, this patch adds three more detailed stats during RTO recovery and exports via TCP_INFO. Applications and monitoring systems can leverage this data to measure the network path diversity and end-to-end repair latency during network outages to improve their network infrastructure. The following counters are added to tcp_sock in order to track RTO events over the lifetime of a TCP socket. 1. u16 total_rto - Counts the total number of RTO timeouts. 2. u16 total_rto_recoveries - Counts the total number of RTO recoveries. 3. u32 total_rto_time - Counts the total time spent (ms) in RTO recoveries. (time spent in CA_Loss and CA_Recovery states) To compute total_rto_time, we add a new u32 rto_stamp field to tcp_sock. rto_stamp records the start timestamp (ms) of the last RTO recovery (CA_Loss). Corresponding fields are also added to the tcp_info struct. Signed-off-by: Aananth V Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 44d946161d4a..e15452df9804 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -377,6 +377,14 @@ struct tcp_sock { * Total data bytes retransmitted */ u32 total_retrans; /* Total retransmits for entire connection */ + u32 rto_stamp; /* Start time (ms) of last CA_Loss recovery */ + u16 total_rto; /* Total number of RTO timeouts, including + * SYN/SYN-ACK and recurring timeouts. + */ + u16 total_rto_recoveries; /* Total number of RTO recoveries, + * including any unfinished recovery. + */ + u32 total_rto_time; /* ms spent in (completed) RTO recoveries. */ u32 urg_seq; /* Seq of received urgent pointer */ unsigned int keepalive_time; /* time before keep alive takes place */ -- cgit v1.2.3 From 9af27da6313c8f8c6a26c7ea3fe23d6b9664a3a8 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 13 Sep 2023 01:31:58 +0200 Subject: bpf: Use bpf_is_subprog to check for subprogs We would like to know whether a bpf_prog corresponds to the main prog or one of the subprogs. The current JIT implementations simply check this using the func_idx in bpf_prog->aux->func_idx. When the index is 0, it belongs to the main program, otherwise it corresponds to some subprogram. This will also be necessary to halt exception propagation while walking the stack when an exception is thrown, so we add a simple helper function to check this, named bpf_is_subprog, and convert existing JIT implementations to also make use of it. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230912233214.1518551-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b9e573159432..9171b0b6a590 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3194,4 +3194,9 @@ static inline gfp_t bpf_memcg_flags(gfp_t flags) return flags; } +static inline bool bpf_is_subprog(const struct bpf_prog *prog) +{ + return prog->aux->func_idx != 0; +} + #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From fd5d27b70188379bb441d404c29a0afb111e1753 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 13 Sep 2023 01:31:59 +0200 Subject: arch/x86: Implement arch_bpf_stack_walk The plumbing for offline unwinding when we throw an exception in programs would require walking the stack, hence introduce a new arch_bpf_stack_walk function. This is provided when the JIT supports exceptions, i.e. bpf_jit_supports_exceptions is true. The arch-specific code is really minimal, hence it should be straightforward to extend this support to other architectures as well, as it reuses the logic of arch_stack_walk, but allowing access to unwind_state data. Once the stack pointer and frame pointer are known for the main subprog during the unwinding, we know the stack layout and location of any callee-saved registers which must be restored before we return back to the kernel. This handling will be added in the subsequent patches. Note that while we primarily unwind through BPF frames, which are effectively CONFIG_UNWINDER_FRAME_POINTER, we still need one of this or CONFIG_UNWINDER_ORC to be able to unwind through the bpf_throw frame from which we begin walking the stack. We also require both sp and bp (stack and frame pointers) from the unwind_state structure, which are only available when one of these two options are enabled. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230912233214.1518551-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0138832ad571..88874de974cb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -954,6 +954,8 @@ bool bpf_jit_needs_zext(void); bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_jit_supports_far_kfunc_call(void); +bool bpf_jit_supports_exceptions(void); +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); static inline bool bpf_dump_raw_ok(const struct cred *cred) -- cgit v1.2.3 From 335d1c5b545284d75ef96ee42e461eacefe865bb Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 13 Sep 2023 01:32:00 +0200 Subject: bpf: Implement support for adding hidden subprogs Introduce support in the verifier for generating a subprogram and include it as part of a BPF program dynamically after the do_check phase is complete. The first user will be the next patch which generates default exception callbacks if none are set for the program. The phase of invocation will be do_misc_fixups. Note that this is an internal verifier function, and should be used with instruction blocks which uphold the invariants stated in check_subprogs. Since these subprogs are always appended to the end of the instruction sequence of the program, it becomes relatively inexpensive to do the related adjustments to the subprog_info of the program. Only the fake exit subprogram is shifted forward, making room for our new subprog. This is useful to insert a new subprogram, get it JITed, and obtain its function pointer. The next patch will use this functionality to insert a default exception callback which will be invoked after unwinding the stack. Note that these added subprograms are invisible to userspace, and never reported in BPF_OBJ_GET_INFO_BY_ID etc. For now, only a single subprogram is supported, but more can be easily supported in the future. To this end, two function counts are introduced now, the existing func_cnt, and real_func_cnt, the latter including hidden programs. This allows us to conver the JIT code to use the real_func_cnt for management of resources while syscall path continues working with existing func_cnt. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230912233214.1518551-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + include/linux/bpf_verifier.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9171b0b6a590..c3667e95af59 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1389,6 +1389,7 @@ struct bpf_prog_aux { u32 stack_depth; u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ + u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ u32 ctx_arg_info_size; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index a3236651ec64..3c2a8636ab29 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -588,6 +588,7 @@ struct bpf_verifier_env { u32 used_map_cnt; /* number of used maps */ u32 used_btf_cnt; /* number of used BTF objects */ u32 id_gen; /* used to generate unique reg IDs */ + u32 hidden_subprog_cnt; /* number of hidden subprogs */ bool explore_alu_limits; bool allow_ptr_leaks; bool allow_uninit_stack; @@ -598,7 +599,7 @@ struct bpf_verifier_env { struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; - struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; + struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 2]; /* max + 2 for the fake and exception subprogs */ union { struct bpf_idmap idmap_scratch; struct bpf_idset idset_scratch; -- cgit v1.2.3 From f18b03fabaa9b7c80e80b72a621f481f0d706ae0 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 13 Sep 2023 01:32:01 +0200 Subject: bpf: Implement BPF exceptions This patch implements BPF exceptions, and introduces a bpf_throw kfunc to allow programs to throw exceptions during their execution at runtime. A bpf_throw invocation is treated as an immediate termination of the program, returning back to its caller within the kernel, unwinding all stack frames. This allows the program to simplify its implementation, by testing for runtime conditions which the verifier has no visibility into, and assert that they are true. In case they are not, the program can simply throw an exception from the other branch. BPF exceptions are explicitly *NOT* an unlikely slowpath error handling primitive, and this objective has guided design choices of the implementation of the them within the kernel (with the bulk of the cost for unwinding the stack offloaded to the bpf_throw kfunc). The implementation of this mechanism requires use of add_hidden_subprog mechanism introduced in the previous patch, which generates a couple of instructions to move R1 to R0 and exit. The JIT then rewrites the prologue of this subprog to take the stack pointer and frame pointer as inputs and reset the stack frame, popping all callee-saved registers saved by the main subprog. The bpf_throw function then walks the stack at runtime, and invokes this exception subprog with the stack and frame pointers as parameters. Reviewers must take note that currently the main program is made to save all callee-saved registers on x86_64 during entry into the program. This is because we must do an equivalent of a lightweight context switch when unwinding the stack, therefore we need the callee-saved registers of the caller of the BPF program to be able to return with a sane state. Note that we have to additionally handle r12, even though it is not used by the program, because when throwing the exception the program makes an entry into the kernel which could clobber r12 after saving it on the stack. To be able to preserve the value we received on program entry, we push r12 and restore it from the generated subprogram when unwinding the stack. For now, bpf_throw invocation fails when lingering resources or locks exist in that path of the program. In a future followup, bpf_throw will be extended to perform frame-by-frame unwinding to release lingering resources for each stack frame, removing this limitation. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230912233214.1518551-5-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +++ include/linux/bpf_verifier.h | 4 ++++ include/linux/filter.h | 6 ++++++ 3 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c3667e95af59..16740ee82082 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1410,6 +1410,8 @@ struct bpf_prog_aux { bool sleepable; bool tail_call_reachable; bool xdp_has_frags; + bool exception_cb; + bool exception_boundary; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1432,6 +1434,7 @@ struct bpf_prog_aux { int cgroup_atype; /* enum cgroup_bpf_attach_type */ struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; + unsigned int (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp); #ifdef CONFIG_SECURITY void *security; #endif diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3c2a8636ab29..da21a3ec5027 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -541,7 +541,9 @@ struct bpf_subprog_info { bool has_tail_call; bool tail_call_reachable; bool has_ld_abs; + bool is_cb; bool is_async_cb; + bool is_exception_cb; }; struct bpf_verifier_env; @@ -589,6 +591,7 @@ struct bpf_verifier_env { u32 used_btf_cnt; /* number of used BTF objects */ u32 id_gen; /* used to generate unique reg IDs */ u32 hidden_subprog_cnt; /* number of hidden subprogs */ + int exception_callback_subprog; bool explore_alu_limits; bool allow_ptr_leaks; bool allow_uninit_stack; @@ -596,6 +599,7 @@ struct bpf_verifier_env { bool bypass_spec_v1; bool bypass_spec_v4; bool seen_direct_write; + bool seen_exception; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; diff --git a/include/linux/filter.h b/include/linux/filter.h index 88874de974cb..27406aee2d40 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1171,6 +1171,7 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, bool is_bpf_text_address(unsigned long addr); int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); +struct bpf_prog *bpf_prog_ksym_find(unsigned long addr); static inline const char * bpf_address_lookup(unsigned long addr, unsigned long *size, @@ -1238,6 +1239,11 @@ static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value, return -ERANGE; } +static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) +{ + return NULL; +} + static inline const char * bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) -- cgit v1.2.3 From b9ae0c9dd0aca79bffc17be51c2dc148d1f72708 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 13 Sep 2023 01:32:03 +0200 Subject: bpf: Add support for custom exception callbacks By default, the subprog generated by the verifier to handle a thrown exception hardcodes a return value of 0. To allow user-defined logic and modification of the return value when an exception is thrown, introduce the 'exception_callback:' declaration tag, which marks a callback as the default exception handler for the program. The format of the declaration tag is 'exception_callback:', where is the name of the exception callback. Each main program can be tagged using this BTF declaratiion tag to associate it with an exception callback. In case the tag is absent, the default callback is used. As such, the exception callback cannot be modified at runtime, only set during verification. Allowing modification of the callback for the current program execution at runtime leads to issues when the programs begin to nest, as any per-CPU state maintaing this information will have to be saved and restored. We don't want it to stay in bpf_prog_aux as this takes a global effect for all programs. An alternative solution is spilling the callback pointer at a known location on the program stack on entry, and then passing this location to bpf_throw as a parameter. However, since exceptions are geared more towards a use case where they are ideally never invoked, optimizing for this use case and adding to the complexity has diminishing returns. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230912233214.1518551-7-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 +++- include/linux/bpf_verifier.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 16740ee82082..30063a760b5a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2422,9 +2422,11 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *reg); + struct bpf_reg_state *reg, bool is_ex_cb); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, struct btf *btf, const struct btf_type *t); +const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key); struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index da21a3ec5027..94ec766432f5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -300,6 +300,7 @@ struct bpf_func_state { bool in_callback_fn; struct tnum callback_ret_range; bool in_async_callback_fn; + bool in_exception_callback_fn; /* The following fields should be last. See copy_func_state() */ int acquired_refs; -- cgit v1.2.3 From 7ccb84f04cda1dd6f64f352e9795db308e9cdc0c Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 13 Sep 2023 01:32:06 +0200 Subject: mm: kasan: Declare kasan_unpoison_task_stack_below in kasan.h We require access to this kasan helper in BPF code in the next patch where we have to unpoison the task stack when we unwind and reset the stack frame from bpf_throw, and it never really unpoisons the poisoned stack slots on entry when compiler instrumentation is generated by CONFIG_KASAN_STACK and inline instrumentation is supported. Also, remove the declaration from mm/kasan/kasan.h as we put it in the header file kasan.h. Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Vincenzo Frascino Suggested-by: Andrey Konovalov Signed-off-by: Kumar Kartikeya Dwivedi Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20230912233214.1518551-10-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/kasan.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 819b6bc8ac08..7a463f814db2 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -283,8 +283,10 @@ static inline bool kasan_check_byte(const void *address) #if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) void kasan_unpoison_task_stack(struct task_struct *task); +asmlinkage void kasan_unpoison_task_stack_below(const void *watermark); #else static inline void kasan_unpoison_task_stack(struct task_struct *task) {} +static inline void kasan_unpoison_task_stack_below(const void *watermark) {} #endif #ifdef CONFIG_KASAN_GENERIC -- cgit v1.2.3 From 12e94aee074ce1c5ffdb8f2246a8c4a095b6aa8a Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Wed, 13 Sep 2023 15:39:00 +0200 Subject: power: supply: core: Don't export power_supply_notifier power_supply_notifier can be internal, since all users are going through power_supply_reg_notifier()/power_supply_unreg_notifier(). Link: https://lore.kernel.org/r/20230913133900.591637-1-sebastian.reichel@collabora.com Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 85b86768c0b9..c0992a77feea 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -767,7 +767,6 @@ struct power_supply_battery_info { int bti_resistance_tolerance; }; -extern struct blocking_notifier_head power_supply_notifier; extern int power_supply_reg_notifier(struct notifier_block *nb); extern void power_supply_unreg_notifier(struct notifier_block *nb); #if IS_ENABLED(CONFIG_POWER_SUPPLY) -- cgit v1.2.3 From 9431063ad323ac864750aeba4d304389bc42ca4e Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Wed, 13 Sep 2023 21:49:37 +0100 Subject: dpll: core: Add DPLL framework base functions DPLL framework is used to represent and configure DPLL devices in systems. Each device that has DPLL and can configure inputs and outputs can use this framework. Implement core framework functions for further interactions with device drivers implementing dpll subsystem, as well as for interactions of DPLL netlink framework part with the subsystem itself. Co-developed-by: Milena Olech Signed-off-by: Milena Olech Co-developed-by: Michal Michalik Signed-off-by: Michal Michalik Signed-off-by: Vadim Fedorenko Co-developed-by: Arkadiusz Kubalewski Signed-off-by: Arkadiusz Kubalewski Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/dpll.h | 133 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 include/linux/dpll.h (limited to 'include/linux') diff --git a/include/linux/dpll.h b/include/linux/dpll.h new file mode 100644 index 000000000000..b47c3560b937 --- /dev/null +++ b/include/linux/dpll.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates + * Copyright (c) 2023 Intel and affiliates + */ + +#ifndef __DPLL_H__ +#define __DPLL_H__ + +#include +#include +#include + +struct dpll_device; +struct dpll_pin; + +struct dpll_device_ops { + int (*mode_get)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_mode *mode, struct netlink_ext_ack *extack); + bool (*mode_supported)(const struct dpll_device *dpll, void *dpll_priv, + const enum dpll_mode mode, + struct netlink_ext_ack *extack); + int (*lock_status_get)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_lock_status *status, + struct netlink_ext_ack *extack); + int (*temp_get)(const struct dpll_device *dpll, void *dpll_priv, + s32 *temp, struct netlink_ext_ack *extack); +}; + +struct dpll_pin_ops { + int (*frequency_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const u64 frequency, + struct netlink_ext_ack *extack); + int (*frequency_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u64 *frequency, struct netlink_ext_ack *extack); + int (*direction_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const enum dpll_pin_direction direction, + struct netlink_ext_ack *extack); + int (*direction_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + enum dpll_pin_direction *direction, + struct netlink_ext_ack *extack); + int (*state_on_pin_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *parent_pin, + void *parent_pin_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack); + int (*state_on_dpll_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, enum dpll_pin_state *state, + struct netlink_ext_ack *extack); + int (*state_on_pin_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *parent_pin, + void *parent_pin_priv, + const enum dpll_pin_state state, + struct netlink_ext_ack *extack); + int (*state_on_dpll_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + const enum dpll_pin_state state, + struct netlink_ext_ack *extack); + int (*prio_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u32 *prio, struct netlink_ext_ack *extack); + int (*prio_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const u32 prio, struct netlink_ext_ack *extack); +}; + +struct dpll_pin_frequency { + u64 min; + u64 max; +}; + +#define DPLL_PIN_FREQUENCY_RANGE(_min, _max) \ + { \ + .min = _min, \ + .max = _max, \ + } + +#define DPLL_PIN_FREQUENCY(_val) DPLL_PIN_FREQUENCY_RANGE(_val, _val) +#define DPLL_PIN_FREQUENCY_1PPS \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_1_HZ) +#define DPLL_PIN_FREQUENCY_10MHZ \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_MHZ) +#define DPLL_PIN_FREQUENCY_IRIG_B \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_KHZ) +#define DPLL_PIN_FREQUENCY_DCF77 \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_77_5_KHZ) + +struct dpll_pin_properties { + const char *board_label; + const char *panel_label; + const char *package_label; + enum dpll_pin_type type; + unsigned long capabilities; + u32 freq_supported_num; + struct dpll_pin_frequency *freq_supported; +}; + +struct dpll_device * +dpll_device_get(u64 clock_id, u32 dev_driver_id, struct module *module); + +void dpll_device_put(struct dpll_device *dpll); + +int dpll_device_register(struct dpll_device *dpll, enum dpll_type type, + const struct dpll_device_ops *ops, void *priv); + +void dpll_device_unregister(struct dpll_device *dpll, + const struct dpll_device_ops *ops, void *priv); + +struct dpll_pin * +dpll_pin_get(u64 clock_id, u32 dev_driver_id, struct module *module, + const struct dpll_pin_properties *prop); + +int dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_put(struct dpll_pin *pin); + +int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +#endif -- cgit v1.2.3 From 9d71b54b65b1fb6c0d3a6c5c88ba9b915c783fbc Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Wed, 13 Sep 2023 21:49:38 +0100 Subject: dpll: netlink: Add DPLL framework base functions DPLL framework is used to represent and configure DPLL devices in systems. Each device that has DPLL and can configure inputs and outputs can use this framework. Implement dpll netlink framework functions for enablement of dpll subsystem netlink family. Co-developed-by: Milena Olech Signed-off-by: Milena Olech Co-developed-by: Michal Michalik Signed-off-by: Michal Michalik Signed-off-by: Vadim Fedorenko Co-developed-by: Arkadiusz Kubalewski Signed-off-by: Arkadiusz Kubalewski Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/dpll.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dpll.h b/include/linux/dpll.h index b47c3560b937..2202310c10cd 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -130,4 +130,8 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, const struct dpll_pin_ops *ops, void *priv); +int dpll_device_change_ntf(struct dpll_device *dpll); + +int dpll_pin_change_ntf(struct dpll_pin *pin); + #endif -- cgit v1.2.3 From 5f18426928800c59fb0f9bc8fb0c182bb6f5ee24 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Sep 2023 21:49:39 +0100 Subject: netdev: expose DPLL pin handle for netdevice In case netdevice represents a SyncE port, the user needs to understand the connection between netdevice and associated DPLL pin. There might me multiple netdevices pointing to the same pin, in case of VF/SF implementation. Add a IFLA Netlink attribute to nest the DPLL pin handle, similar to how it is implemented for devlink port. Add a struct dpll_pin pointer to netdev and protect access to it by RTNL. Expose netdev_dpll_pin_set() and netdev_dpll_pin_clear() helpers to the drivers so they can set/clear the DPLL pin relationship to netdev. Note that during the lifetime of struct dpll_pin the pin handle does not change. Therefore it is save to access it lockless. It is drivers responsibility to call netdev_dpll_pin_clear() before dpll_pin_put(). Signed-off-by: Jiri Pirko Signed-off-by: Arkadiusz Kubalewski Signed-off-by: Vadim Fedorenko Signed-off-by: David S. Miller --- include/linux/dpll.h | 15 +++++++++++++++ include/linux/netdevice.h | 21 +++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dpll.h b/include/linux/dpll.h index 2202310c10cd..bbc480cd2932 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -101,6 +101,21 @@ struct dpll_pin_properties { struct dpll_pin_frequency *freq_supported; }; +#if IS_ENABLED(CONFIG_DPLL) +size_t dpll_msg_pin_handle_size(struct dpll_pin *pin); +int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin); +#else +static inline size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) +{ + return 0; +} + +static inline int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +{ + return 0; +} +#endif + struct dpll_device * dpll_device_get(u64 clock_id, u32 dev_driver_id, struct module *module); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0896aaa91dd7..db3d8429d50d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -79,6 +79,8 @@ struct xdp_buff; struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; +/* DPLL specific */ +struct dpll_pin; typedef u32 xdp_features_t; @@ -2049,6 +2051,9 @@ enum netdev_ml_priv_type { * SET_NETDEV_DEVLINK_PORT macro. This pointer is static * during the time netdevice is registered. * + * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem, + * where the clock is recovered. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2405,6 +2410,10 @@ struct net_device { struct rtnl_hw_stats64 *offload_xstats_l3; struct devlink_port *devlink_port; + +#if IS_ENABLED(CONFIG_DPLL) + struct dpll_pin *dpll_pin; +#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -3940,6 +3949,18 @@ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); +void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); +void netdev_dpll_pin_clear(struct net_device *dev); + +static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) +{ +#if IS_ENABLED(CONFIG_DPLL) + return dev->dpll_pin; +#else + return NULL; +#endif +} + struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); -- cgit v1.2.3 From 496fd0a26bbf73b6b12407ee4fbe5ff49d659a6d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Sep 2023 21:49:43 +0100 Subject: mlx5: Implement SyncE support using DPLL infrastructure Implement SyncE support using newly introduced DPLL support. Make sure that each PFs/VFs/SFs probed with appropriate capability will spawn a dpll auxiliary device and register appropriate dpll device and pin instances. Signed-off-by: Jiri Pirko Signed-off-by: Arkadiusz Kubalewski Signed-off-by: Vadim Fedorenko Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 2 ++ include/linux/mlx5/mlx5_ifc.h | 59 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3033bbaeac81..92434814c855 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -155,6 +155,8 @@ enum { MLX5_REG_MCC = 0x9062, MLX5_REG_MCDA = 0x9063, MLX5_REG_MCAM = 0x907f, + MLX5_REG_MSECQ = 0x9155, + MLX5_REG_MSEES = 0x9156, MLX5_REG_MIRC = 0x9162, MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fc3db401f8a2..dd8421d021cf 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10176,7 +10176,9 @@ struct mlx5_ifc_mcam_access_reg_bits2 { u8 mirc[0x1]; u8 regs_97_to_96[0x2]; - u8 regs_95_to_64[0x20]; + u8 regs_95_to_87[0x09]; + u8 synce_registers[0x2]; + u8 regs_84_to_64[0x15]; u8 regs_63_to_32[0x20]; @@ -12549,4 +12551,59 @@ struct mlx5_ifc_modify_page_track_obj_in_bits { struct mlx5_ifc_page_track_bits obj_context; }; +struct mlx5_ifc_msecq_reg_bits { + u8 reserved_at_0[0x20]; + + u8 reserved_at_20[0x12]; + u8 network_option[0x2]; + u8 local_ssm_code[0x4]; + u8 local_enhanced_ssm_code[0x8]; + + u8 local_clock_identity[0x40]; + + u8 reserved_at_80[0x180]; +}; + +enum { + MLX5_MSEES_FIELD_SELECT_ENABLE = BIT(0), + MLX5_MSEES_FIELD_SELECT_ADMIN_STATUS = BIT(1), + MLX5_MSEES_FIELD_SELECT_ADMIN_FREQ_MEASURE = BIT(2), +}; + +enum mlx5_msees_admin_status { + MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING = 0x0, + MLX5_MSEES_ADMIN_STATUS_TRACK = 0x1, +}; + +enum mlx5_msees_oper_status { + MLX5_MSEES_OPER_STATUS_FREE_RUNNING = 0x0, + MLX5_MSEES_OPER_STATUS_SELF_TRACK = 0x1, + MLX5_MSEES_OPER_STATUS_OTHER_TRACK = 0x2, + MLX5_MSEES_OPER_STATUS_HOLDOVER = 0x3, + MLX5_MSEES_OPER_STATUS_FAIL_HOLDOVER = 0x4, + MLX5_MSEES_OPER_STATUS_FAIL_FREE_RUNNING = 0x5, +}; + +struct mlx5_ifc_msees_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 lp_msb[0x2]; + u8 reserved_at_14[0xc]; + + u8 field_select[0x20]; + + u8 admin_status[0x4]; + u8 oper_status[0x4]; + u8 ho_acq[0x1]; + u8 reserved_at_49[0xc]; + u8 admin_freq_measure[0x1]; + u8 oper_freq_measure[0x1]; + u8 failure_reason[0x9]; + + u8 frequency_diff[0x20]; + + u8 reserved_at_80[0x180]; +}; + #endif /* MLX5_IFC_H */ -- cgit v1.2.3 From ac5f395685bd16ca30c1c834dcbf8b555605ccae Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Sep 2023 09:12:40 +0200 Subject: net/mlx5: SF, Implement peer devlink set for SF representor devlink port Benefit from the existence of internal mlx5 notifier and extend it by event MLX5_DRIVER_EVENT_SF_PEER_DEVLINK. Use this event from SF auxiliary device probe/remove functions to pass the registered SF devlink instance to the SF representor. Process the new event in SF representor code and call devl_port_fn_devlink_set() to do the assignments. Implement this in work to avoid possible deadlock when probe/remove function of SF may be called with devlink instance lock held during devlink reload. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4d5be378fa8c..8fbe22de16ef 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -366,6 +366,7 @@ enum mlx5_driver_event { MLX5_DRIVER_EVENT_UPLINK_NETDEV, MLX5_DRIVER_EVENT_MACSEC_SA_ADDED, MLX5_DRIVER_EVENT_MACSEC_SA_DELETED, + MLX5_DRIVER_EVENT_SF_PEER_DEVLINK, }; enum { -- cgit v1.2.3 From 86328b338c3996b814417dd68e3f899a1a649059 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Tue, 12 Sep 2023 13:59:49 +0530 Subject: vmcore: remove dependency with is_kdump_kernel() for exporting vmcore Currently, is_kdump_kernel() returns true when elfcorehdr_addr is set. While elfcorehdr_addr is set for kexec based kernel dump mechanism, alternate dump capturing methods like fadump [1] also set it to export the vmcore. Since, is_kdump_kernel() is used to restrict resources in crash dump capture kernel and such restrictions may not be desirable for fadump, allow is_kdump_kernel() to be defined differently for such scenarios. With this, is_kdump_kernel() could be false while vmcore is usable. So, remove unnecessary dependency with is_kdump_kernel(), for exporting vmcore. [1] https://docs.kernel.org/powerpc/firmware-assisted-dump.html Suggested-by: Michael Ellerman Signed-off-by: Hari Bathini Acked-by: Baoquan He Signed-off-by: Michael Ellerman Link: https://msgid.link/20230912082950.856977-1-hbathini@linux.ibm.com --- include/linux/crash_dump.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 0f3a656293b0..acc55626afdc 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -50,6 +50,7 @@ void vmcore_cleanup(void); #define vmcore_elf64_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) #endif +#ifndef is_kdump_kernel /* * is_kdump_kernel() checks whether this kernel is booting after a panic of * previous kernel or not. This is determined by checking if previous kernel @@ -64,6 +65,7 @@ static inline bool is_kdump_kernel(void) { return elfcorehdr_addr != ELFCORE_ADDR_MAX; } +#endif /* is_vmcore_usable() checks if the kernel is booting after a panic and * the vmcore region is usable. @@ -75,7 +77,8 @@ static inline bool is_kdump_kernel(void) static inline int is_vmcore_usable(void) { - return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0; + return elfcorehdr_addr != ELFCORE_ADDR_ERR && + elfcorehdr_addr != ELFCORE_ADDR_MAX ? 1 : 0; } /* vmcore_unusable() marks the vmcore as unusable, @@ -84,8 +87,7 @@ static inline int is_vmcore_usable(void) static inline void vmcore_unusable(void) { - if (is_kdump_kernel()) - elfcorehdr_addr = ELFCORE_ADDR_ERR; + elfcorehdr_addr = ELFCORE_ADDR_ERR; } /** -- cgit v1.2.3 From fbaa6a181a4b1886cbf4214abdf9a2df68471510 Mon Sep 17 00:00:00 2001 From: Elliot Berman Date: Fri, 8 Sep 2023 15:49:15 -0700 Subject: sched/core: Remove ifdeffery for saved_state In preparation for freezer to also use saved_state, remove the CONFIG_PREEMPT_RT compilation guard around saved_state. On the arm64 platform I tested which did not have CONFIG_PREEMPT_RT, there was no statistically significant deviation by applying this patch. Test methodology: perf bench sched message -g 40 -l 40 Signed-off-by: Elliot Berman Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 77f01ac385f7..dc37ae787e33 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -750,10 +750,8 @@ struct task_struct { #endif unsigned int __state; -#ifdef CONFIG_PREEMPT_RT /* saved state for "spinlock sleepers" */ unsigned int saved_state; -#endif /* * This begins the randomizable portion of task_struct. Only -- cgit v1.2.3 From f7b5bd725b737de3f2c4a836e07c82ba156d75df Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 14 Sep 2023 15:31:57 -0700 Subject: pds_core: check health in devcmd wait Similar to what we do in the AdminQ, check for devcmd health while waiting for an answer. Signed-off-by: Shannon Nelson Reviewed-by: Brett Creeley Signed-off-by: David S. Miller --- include/linux/pds/pds_core_if.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pds/pds_core_if.h b/include/linux/pds/pds_core_if.h index e838a2b90440..17a87c1a55d7 100644 --- a/include/linux/pds/pds_core_if.h +++ b/include/linux/pds/pds_core_if.h @@ -79,6 +79,7 @@ enum pds_core_status_code { PDS_RC_EVFID = 31, /* VF ID does not exist */ PDS_RC_BAD_FW = 32, /* FW file is invalid or corrupted */ PDS_RC_ECLIENT = 33, /* No such client id */ + PDS_RC_BAD_PCI = 255, /* Broken PCI when reading status */ }; /** -- cgit v1.2.3 From b0af4bcb49464c221ad5f95d40f2b1b252ceedcc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Sep 2023 20:43:18 +0206 Subject: serial: core: Provide port lock wrappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a serial port is used for kernel console output, then all modifications to the UART registers which are done from other contexts, e.g. getty, termios, are interference points for the kernel console. So far this has been ignored and the printk output is based on the principle of hope. The rework of the console infrastructure which aims to support threaded and atomic consoles, requires to mark sections which modify the UART registers as unsafe. This allows the atomic write function to make informed decisions and eventually to restore operational state. It also allows to prevent the regular UART code from modifying UART registers while printk output is in progress. All modifications of UART registers are guarded by the UART port lock, which provides an obvious synchronization point with the console infrastructure. Provide wrapper functions for spin_[un]lock*(port->lock) invocations so that the console mechanics can be applied later on at a single place and does not require to copy the same logic all over the drivers. Signed-off-by: Thomas Gleixner Reviewed-by: Ilpo Järvinen Signed-off-by: John Ogness Link: https://lore.kernel.org/r/20230914183831.587273-2-john.ogness@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 79 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index bb6f073bc159..f1d5c0d1568c 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -588,6 +588,85 @@ struct uart_port { void *private_data; /* generic platform data pointer */ }; +/** + * uart_port_lock - Lock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock(struct uart_port *up) +{ + spin_lock(&up->lock); +} + +/** + * uart_port_lock_irq - Lock the UART port and disable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock_irq(struct uart_port *up) +{ + spin_lock_irq(&up->lock); +} + +/** + * uart_port_lock_irqsave - Lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + */ +static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) +{ + spin_lock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_trylock - Try to lock the UART port + * @up: Pointer to UART port structure + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock(struct uart_port *up) +{ + return spin_trylock(&up->lock); +} + +/** + * uart_port_trylock_irqsave - Try to lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) +{ + return spin_trylock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_unlock - Unlock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock(struct uart_port *up) +{ + spin_unlock(&up->lock); +} + +/** + * uart_port_unlock_irq - Unlock the UART port and re-enable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock_irq(struct uart_port *up) +{ + spin_unlock_irq(&up->lock); +} + +/** + * uart_port_lock_irqrestore - Unlock the UART port, restore interrupts + * @up: Pointer to UART port structure + * @flags: The saved interrupt flags for restore + */ +static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) +{ + spin_unlock_irqrestore(&up->lock, flags); +} + static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); -- cgit v1.2.3 From c5cbdb76e8e33ce90fec2946e8eee7d71d68e57a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Sep 2023 20:43:19 +0206 Subject: serial: core: Use lock wrappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a serial port is used for kernel console output, then all modifications to the UART registers which are done from other contexts, e.g. getty, termios, are interference points for the kernel console. So far this has been ignored and the printk output is based on the principle of hope. The rework of the console infrastructure which aims to support threaded and atomic consoles, requires to mark sections which modify the UART registers as unsafe. This allows the atomic write function to make informed decisions and eventually to restore operational state. It also allows to prevent the regular UART code from modifying UART registers while printk output is in progress. All modifications of UART registers are guarded by the UART port lock, which provides an obvious synchronization point with the console infrastructure. To avoid adding this functionality to all UART drivers, wrap the spin_[un]lock*() invocations for uart_port::lock into helper functions which just contain the spin_[un]lock*() invocations for now. In a subsequent step these helpers will gain the console synchronization mechanisms. Converted with coccinelle. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Ilpo Järvinen Signed-off-by: John Ogness Link: https://lore.kernel.org/r/20230914183831.587273-3-john.ogness@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index f1d5c0d1568c..3091c62ec37b 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -1035,14 +1035,14 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port) u8 sysrq_ch; if (!port->has_sysrq) { - spin_unlock(&port->lock); + uart_port_unlock(port); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; - spin_unlock(&port->lock); + uart_port_unlock(port); if (sysrq_ch) handle_sysrq(sysrq_ch); @@ -1054,14 +1054,14 @@ static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port u8 sysrq_ch; if (!port->has_sysrq) { - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); if (sysrq_ch) handle_sysrq(sysrq_ch); @@ -1077,12 +1077,12 @@ static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch) } static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { - spin_unlock(&port->lock); + uart_port_unlock(port); } static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, unsigned long flags) { - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ -- cgit v1.2.3 From 1cb6422ecac8804ebe0b71f4b3440674955fec73 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 Sep 2023 13:15:10 -0700 Subject: ceph: Annotate struct ceph_monmap with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct ceph_monmap. Additionally, since the element count member must be set before accessing the annotated flexible array member, move its initialization earlier. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Ilya Dryomov Cc: Xiubo Li Cc: Jeff Layton Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: ceph-devel@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Reviewed-by: Xiubo Li Signed-off-by: David S. Miller --- include/linux/ceph/mon_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index b658961156a0..7a9a40163c0f 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -19,7 +19,7 @@ struct ceph_monmap { struct ceph_fsid fsid; u32 epoch; u32 num_mon; - struct ceph_entity_inst mon_inst[]; + struct ceph_entity_inst mon_inst[] __counted_by(num_mon); }; struct ceph_mon_client; -- cgit v1.2.3 From d57125b55a292a8e74a1fb17182576a3b2b2e795 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 18 Sep 2023 10:44:08 +0200 Subject: Revert "ceph: make members in struct ceph_mds_request_args_ext a union" This reverts commit 3af5ae22030cb59fab4fba35f5a2b62f47e14df9. ceph_mds_request_args_ext was already (and remains to be) a union. An additional anonymous union inside is bogus: union ceph_mds_request_args_ext { union { union ceph_mds_request_args old; struct { ... } __attribute__ ((packed)) setattr_ext; }; } Signed-off-by: Ilya Dryomov Reviewed-by: Xiubo Li --- include/linux/ceph/ceph_fs.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 5f2301ee88bc..f3b3593254b9 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -467,19 +467,17 @@ union ceph_mds_request_args { } __attribute__ ((packed)); union ceph_mds_request_args_ext { - union { - union ceph_mds_request_args old; - struct { - __le32 mode; - __le32 uid; - __le32 gid; - struct ceph_timespec mtime; - struct ceph_timespec atime; - __le64 size, old_size; /* old_size needed by truncate */ - __le32 mask; /* CEPH_SETATTR_* */ - struct ceph_timespec btime; - } __attribute__ ((packed)) setattr_ext; - }; + union ceph_mds_request_args old; + struct { + __le32 mode; + __le32 uid; + __le32 gid; + struct ceph_timespec mtime; + struct ceph_timespec atime; + __le64 size, old_size; /* old_size needed by truncate */ + __le32 mask; /* CEPH_SETATTR_* */ + struct ceph_timespec btime; + } __attribute__ ((packed)) setattr_ext; }; #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ -- cgit v1.2.3 From 8452a05b2c633b708dbe3e742f71b24bf21fe42d Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Sat, 16 Sep 2023 14:33:12 +0800 Subject: net: stmmac: Tx coe sw fallback Add sw fallback of tx checksum calculation for those tx queues that don't support tx checksum offloading. DW xGMAC IP can be synthesized such that it can support tx checksum offloading only for a few initial tx queues. Also as Serge pointed out, for the DW QoS IP, tx coe can be individually configured for each tx queue. So when tx coe is enabled, for any tx queue that doesn't support tx coe with 'coe-unsupported' flag set will have a sw fallback happen in the driver for tx checksum calculation when any packets to be transmitted on these tx queues. Signed-off-by: Rohan G Thomas Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index ce89cc3e4913..c0079a7574ae 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -139,6 +139,7 @@ struct stmmac_rxq_cfg { struct stmmac_txq_cfg { u32 weight; + bool coe_unsupported; u8 mode_to_use; /* Credit Base Shaper parameters */ u32 send_slope; -- cgit v1.2.3 From 6b93bb41f6eaa1cc5c5f30ec3b687b380f116cd0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:00 +0206 Subject: printk: Add non-BKL (nbcon) console basic infrastructure The current console/printk subsystem is protected by a Big Kernel Lock, (aka console_lock) which has ill defined semantics and is more or less stateless. This puts severe limitations on the console subsystem and makes forced takeover and output in emergency and panic situations a fragile endeavour that is based on try and pray. The goal of non-BKL (nbcon) consoles is to break out of the console lock jail and to provide a new infrastructure that avoids the pitfalls and also allows console drivers to be gradually converted over. The proposed infrastructure aims for the following properties: - Per console locking instead of global locking - Per console state that allows to make informed decisions - Stateful handover and takeover As a first step, state is added to struct console. The per console state is an atomic_t using a 32bit bit field. Reserve state bits, which will be populated later in the series. Wire it up into the console register/unregister functionality. It was decided to use a bitfield because using a plain u32 with mask/shift operations resulted in uncomprehensible code. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-2-john.ogness@linutronix.de --- include/linux/console.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 7de11c763eb3..a2d37a7a98a8 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -156,6 +156,8 @@ static inline int con_debug_leave(void) * /dev/kmesg which requires a larger output buffer. * @CON_SUSPENDED: Indicates if a console is suspended. If true, the * printing callbacks must not be called. + * @CON_NBCON: Console can operate outside of the legacy style console_lock + * constraints. */ enum cons_flags { CON_PRINTBUFFER = BIT(0), @@ -166,8 +168,32 @@ enum cons_flags { CON_BRL = BIT(5), CON_EXTENDED = BIT(6), CON_SUSPENDED = BIT(7), + CON_NBCON = BIT(8), }; +/** + * struct nbcon_state - console state for nbcon consoles + * @atom: Compound of the state fields for atomic operations + * + * To be used for reading and preparing of the value stored in the nbcon + * state variable @console::nbcon_state. + */ +struct nbcon_state { + union { + unsigned int atom; + struct { + }; + }; +}; + +/* + * The nbcon_state struct is used to easily create and interpret values that + * are stored in the @console::nbcon_state variable. Ensure this struct stays + * within the size boundaries of the atomic variable's underlying type in + * order to avoid any accidental truncation. + */ +static_assert(sizeof(struct nbcon_state) <= sizeof(int)); + /** * struct console - The console descriptor structure * @name: The name of the console driver @@ -187,6 +213,8 @@ enum cons_flags { * @dropped: Number of unreported dropped ringbuffer records * @data: Driver private data * @node: hlist node for the console list + * + * @nbcon_state: State for nbcon consoles */ struct console { char name[16]; @@ -206,6 +234,9 @@ struct console { unsigned long dropped; void *data; struct hlist_node node; + + /* nbcon console specific members */ + atomic_t __private nbcon_state; }; #ifdef CONFIG_LOCKDEP -- cgit v1.2.3 From 3a5bb25162b880da749f7cdf281c78dbade4164b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:01 +0206 Subject: printk: nbcon: Add acquire/release logic Add per console acquire/release functionality. The state of the console is maintained in the "nbcon_state" atomic variable. The console is locked when: - The 'prio' field contains the priority of the context that owns the console. Only higher priority contexts are allowed to take over the lock. A value of 0 (NBCON_PRIO_NONE) means the console is not locked. - The 'cpu' field denotes on which CPU the console is locked. It is used to prevent busy waiting on the same CPU. Also it informs the lock owner that it has lost the lock in a more complex scenario when the lock was taken over by a higher priority context, released, and taken on another CPU with the same priority as the interrupted owner. The acquire mechanism uses a few more fields: - The 'req_prio' field is used by the handover approach to make the current owner aware that there is a context with a higher priority waiting for the friendly handover. - The 'unsafe' field allows to take over the console in a safe way in the middle of emitting a message. The field is set only when accessing some shared resources or when the console device is manipulated. It can be cleared, for example, after emitting one character when the console device is in a consistent state. - The 'unsafe_takeover' field is set when a hostile takeover took the console in an unsafe state. The console will stay in the unsafe state until re-initialized. The acquire mechanism uses three approaches: 1) Direct acquire when the console is not owned or is owned by a lower priority context and is in a safe state. 2) Friendly handover mechanism uses a request/grant handshake. It is used when the current owner has lower priority and the console is in an unsafe state. The requesting context: a) Sets its priority into the 'req_prio' field. b) Waits (with a timeout) for the owning context to unlock the console. c) Takes the lock and clears the 'req_prio' field. The owning context: a) Observes the 'req_prio' field set on exit from the unsafe console state. b) Gives up console ownership by clearing the 'prio' field. 3) Unsafe hostile takeover allows to take over the lock even when the console is an unsafe state. It is used only in panic() by the final attempt to flush consoles in a try and hope mode. Note that separate record buffers are used in panic(). As a result, the messages can be read and formatted without any risk even after using the hostile takeover in unsafe state. The release function simply clears the 'prio' field. All operations on @console::nbcon_state are atomic cmpxchg based to handle concurrency. The acquire/release functions implement only minimal policies: - Preference for higher priority contexts. - Protection of the panic CPU. All other policy decisions must be made at the call sites: - What is marked as an unsafe section. - Whether to spin-wait if there is already an owner and the console is in an unsafe state. - Whether to attempt an unsafe hostile takeover. The design allows to implement the well known: acquire() output_one_printk_record() release() The output of one printk record might be interrupted with a higher priority context. The new owner is supposed to reprint the entire interrupted record from scratch. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-3-john.ogness@linutronix.de --- include/linux/console.h | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index a2d37a7a98a8..98210fd01f18 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -175,13 +175,29 @@ enum cons_flags { * struct nbcon_state - console state for nbcon consoles * @atom: Compound of the state fields for atomic operations * + * @req_prio: The priority of a handover request + * @prio: The priority of the current owner + * @unsafe: Console is busy in a non takeover region + * @unsafe_takeover: A hostile takeover in an unsafe state happened in the + * past. The console cannot be safe until re-initialized. + * @cpu: The CPU on which the owner runs + * * To be used for reading and preparing of the value stored in the nbcon * state variable @console::nbcon_state. + * + * The @prio and @req_prio fields are particularly important to allow + * spin-waiting to timeout and give up without the risk of a waiter being + * assigned the lock after giving up. */ struct nbcon_state { union { unsigned int atom; struct { + unsigned int prio : 2; + unsigned int req_prio : 2; + unsigned int unsafe : 1; + unsigned int unsafe_takeover : 1; + unsigned int cpu : 24; }; }; }; @@ -194,6 +210,46 @@ struct nbcon_state { */ static_assert(sizeof(struct nbcon_state) <= sizeof(int)); +/** + * nbcon_prio - console owner priority for nbcon consoles + * @NBCON_PRIO_NONE: Unused + * @NBCON_PRIO_NORMAL: Normal (non-emergency) usage + * @NBCON_PRIO_EMERGENCY: Emergency output (WARN/OOPS...) + * @NBCON_PRIO_PANIC: Panic output + * @NBCON_PRIO_MAX: The number of priority levels + * + * A higher priority context can takeover the console when it is + * in the safe state. The final attempt to flush consoles in panic() + * can be allowed to do so even in an unsafe state (Hope and pray). + */ +enum nbcon_prio { + NBCON_PRIO_NONE = 0, + NBCON_PRIO_NORMAL, + NBCON_PRIO_EMERGENCY, + NBCON_PRIO_PANIC, + NBCON_PRIO_MAX, +}; + +struct console; + +/** + * struct nbcon_context - Context for console acquire/release + * @console: The associated console + * @spinwait_max_us: Limit for spin-wait acquire + * @prio: Priority of the context + * @allow_unsafe_takeover: Allow performing takeover even if unsafe. Can + * be used only with NBCON_PRIO_PANIC @prio. It + * might cause a system freeze when the console + * is used later. + */ +struct nbcon_context { + /* members set by caller */ + struct console *console; + unsigned int spinwait_max_us; + enum nbcon_prio prio; + unsigned int allow_unsafe_takeover : 1; +}; + /** * struct console - The console descriptor structure * @name: The name of the console driver -- cgit v1.2.3 From 5634c90fd8553de7cbafecd048d0273690a2e84e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:03 +0206 Subject: printk: nbcon: Add buffer management In case of hostile takeovers it must be ensured that the previous owner cannot scribble over the output buffer of the emergency/panic context. This is achieved by: - Adding a global output buffer instance for the panic context. This is the only situation where hostile takeovers can occur and there is always at most 1 panic context. - Allocating an output buffer per non-boot console upon console registration. This buffer is used by the console owner when not in panic context. (For boot consoles, the existing shared global legacy output buffer is used instead. Boot console printing will be synchronized with legacy console printing.) - Choosing the appropriate buffer is handled in the acquire/release functions. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-5-john.ogness@linutronix.de --- include/linux/console.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 98210fd01f18..ca1ef8700e55 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -231,6 +231,7 @@ enum nbcon_prio { }; struct console; +struct printk_buffers; /** * struct nbcon_context - Context for console acquire/release @@ -241,6 +242,7 @@ struct console; * be used only with NBCON_PRIO_PANIC @prio. It * might cause a system freeze when the console * is used later. + * @pbufs: Pointer to the text buffer for this context */ struct nbcon_context { /* members set by caller */ @@ -248,6 +250,9 @@ struct nbcon_context { unsigned int spinwait_max_us; enum nbcon_prio prio; unsigned int allow_unsafe_takeover : 1; + + /* members set by acquire */ + struct printk_buffers *pbufs; }; /** @@ -271,6 +276,7 @@ struct nbcon_context { * @node: hlist node for the console list * * @nbcon_state: State for nbcon consoles + * @pbufs: Pointer to nbcon private buffer */ struct console { char name[16]; @@ -293,6 +299,7 @@ struct console { /* nbcon console specific members */ atomic_t __private nbcon_state; + struct printk_buffers *pbufs; }; #ifdef CONFIG_LOCKDEP -- cgit v1.2.3 From ad56ebd1d79b216dc147474fac89a11daf6b10df Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:05 +0206 Subject: printk: nbcon: Add sequence handling Add an atomic_long_t field @nbcon_seq to the console struct to store the sequence number for nbcon consoles. For nbcon consoles this will be used instead of the non-atomic @seq field. The new field allows for safe atomic sequence number updates without requiring any locking. On 64bit systems the new field stores the full sequence number. On 32bit systems the new field stores the lower 32 bits of the sequence number, which are expanded to 64bit as needed by folding the values based on the sequence numbers available in the ringbuffer. For 32bit systems, having a 32bit representation in the console is sufficient. If a console ever gets more than 2^31 records behind the ringbuffer then this is the least of the problems. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-7-john.ogness@linutronix.de --- include/linux/console.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index ca1ef8700e55..20cd486b76ad 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -243,6 +243,7 @@ struct printk_buffers; * might cause a system freeze when the console * is used later. * @pbufs: Pointer to the text buffer for this context + * @seq: The sequence number to print for this context */ struct nbcon_context { /* members set by caller */ @@ -253,6 +254,7 @@ struct nbcon_context { /* members set by acquire */ struct printk_buffers *pbufs; + u64 seq; }; /** @@ -276,6 +278,7 @@ struct nbcon_context { * @node: hlist node for the console list * * @nbcon_state: State for nbcon consoles + * @nbcon_seq: Sequence number of the next record for nbcon to print * @pbufs: Pointer to nbcon private buffer */ struct console { @@ -299,6 +302,7 @@ struct console { /* nbcon console specific members */ atomic_t __private nbcon_state; + atomic_long_t __private nbcon_seq; struct printk_buffers *pbufs; }; -- cgit v1.2.3 From 06653d57ff283be627a2c769139d73ecc487810f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:06 +0206 Subject: printk: nbcon: Add emit function and callback function for atomic printing Implement an emit function for nbcon consoles to output printk messages. It utilizes the lockless printk_get_next_message() and console_prepend_dropped() functions to retrieve/build the output message. The emit function includes the required safety points to check for handover/takeover and calls a new write_atomic callback of the console driver to output the message. It also includes proper handling for updating the nbcon console sequence number. A new nbcon_write_context struct is introduced. This is provided to the write_atomic callback and includes only the information necessary for performing atomic writes. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-8-john.ogness@linutronix.de --- include/linux/console.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 20cd486b76ad..14563dcb34b1 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -242,6 +242,7 @@ struct printk_buffers; * be used only with NBCON_PRIO_PANIC @prio. It * might cause a system freeze when the console * is used later. + * @backlog: Ringbuffer has pending records * @pbufs: Pointer to the text buffer for this context * @seq: The sequence number to print for this context */ @@ -252,11 +253,28 @@ struct nbcon_context { enum nbcon_prio prio; unsigned int allow_unsafe_takeover : 1; + /* members set by emit */ + unsigned int backlog : 1; + /* members set by acquire */ struct printk_buffers *pbufs; u64 seq; }; +/** + * struct nbcon_write_context - Context handed to the nbcon write callbacks + * @ctxt: The core console context + * @outbuf: Pointer to the text buffer for output + * @len: Length to write + * @unsafe_takeover: If a hostile takeover in an unsafe state has occurred + */ +struct nbcon_write_context { + struct nbcon_context __private ctxt; + char *outbuf; + unsigned int len; + bool unsafe_takeover; +}; + /** * struct console - The console descriptor structure * @name: The name of the console driver @@ -277,6 +295,7 @@ struct nbcon_context { * @data: Driver private data * @node: hlist node for the console list * + * @write_atomic: Write callback for atomic context * @nbcon_state: State for nbcon consoles * @nbcon_seq: Sequence number of the next record for nbcon to print * @pbufs: Pointer to nbcon private buffer @@ -301,6 +320,8 @@ struct console { struct hlist_node node; /* nbcon console specific members */ + bool (*write_atomic)(struct console *con, + struct nbcon_write_context *wctxt); atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct printk_buffers *pbufs; -- cgit v1.2.3 From 9757acd0a700ba4a0d16dde4ba820eb052aba1a7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:07 +0206 Subject: printk: nbcon: Allow drivers to mark unsafe regions and check state For the write_atomic callback, the console driver may have unsafe regions that need to be appropriately marked. Provide functions that accept the nbcon_write_context struct to allow for the driver to enter and exit unsafe regions. Also provide a function for drivers to check if they are still the owner of the console. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-9-john.ogness@linutronix.de --- include/linux/console.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 14563dcb34b1..e4fc6f7c1496 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -451,6 +451,16 @@ static inline bool console_is_registered(const struct console *con) lockdep_assert_console_list_lock_held(); \ hlist_for_each_entry(con, &console_list, node) +#ifdef CONFIG_PRINTK +extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); +extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); +extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); +#else +static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } +static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } +static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } +#endif + extern int console_set_on_cmdline; extern struct console *early_console; -- cgit v1.2.3 From 492032760127251e5540a5716a70996bacf2a3fd Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Mon, 18 Sep 2023 20:30:11 +0800 Subject: team: fix null-ptr-deref when team device type is changed Get a null-ptr-deref bug as follows with reproducer [1]. BUG: kernel NULL pointer dereference, address: 0000000000000228 ... RIP: 0010:vlan_dev_hard_header+0x35/0x140 [8021q] ... Call Trace: ? __die+0x24/0x70 ? page_fault_oops+0x82/0x150 ? exc_page_fault+0x69/0x150 ? asm_exc_page_fault+0x26/0x30 ? vlan_dev_hard_header+0x35/0x140 [8021q] ? vlan_dev_hard_header+0x8e/0x140 [8021q] neigh_connected_output+0xb2/0x100 ip6_finish_output2+0x1cb/0x520 ? nf_hook_slow+0x43/0xc0 ? ip6_mtu+0x46/0x80 ip6_finish_output+0x2a/0xb0 mld_sendpack+0x18f/0x250 mld_ifc_work+0x39/0x160 process_one_work+0x1e6/0x3f0 worker_thread+0x4d/0x2f0 ? __pfx_worker_thread+0x10/0x10 kthread+0xe5/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 [1] $ teamd -t team0 -d -c '{"runner": {"name": "loadbalance"}}' $ ip link add name t-dummy type dummy $ ip link add link t-dummy name t-dummy.100 type vlan id 100 $ ip link add name t-nlmon type nlmon $ ip link set t-nlmon master team0 $ ip link set t-nlmon nomaster $ ip link set t-dummy up $ ip link set team0 up $ ip link set t-dummy.100 down $ ip link set t-dummy.100 master team0 When enslave a vlan device to team device and team device type is changed from non-ether to ether, header_ops of team device is changed to vlan_header_ops. That is incorrect and will trigger null-ptr-deref for vlan->real_dev in vlan_dev_hard_header() because team device is not a vlan device. Cache eth_header_ops in team_setup(), then assign cached header_ops to header_ops of team net device when its type is changed from non-ether to ether to fix the bug. Fixes: 1d76efe1577b ("team: add support for non-ethernet devices") Suggested-by: Hangbin Liu Reviewed-by: Hangbin Liu Signed-off-by: Ziyang Xuan Reviewed-by: Jiri Pirko Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230918123011.1884401-1-william.xuanziyang@huawei.com Signed-off-by: Paolo Abeni --- include/linux/if_team.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 1b9b15a492fa..cdc684e04a2f 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -189,6 +189,8 @@ struct team { struct net_device *dev; /* associated netdevice */ struct team_pcpu_stats __percpu *pcpu_stats; + const struct header_ops *header_ops_cache; + struct mutex lock; /* used for overall locking, e.g. port lists write */ /* -- cgit v1.2.3 From 6a23c555f7eb436d6799533675ffa179db3d5834 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 18 Sep 2023 14:25:36 +0100 Subject: net: phy: fix regression with AX88772A PHY driver Marek reports that a deadlock occurs with the AX88772A PHY used on the ASIX USB network driver: asix 1-1.4:1.0 (unnamed net_device) (uninitialized): PHY [usb-001:003:10] driver [Asix Electronics AX88772A] (irq=POLL) Asix Electronics AX88772A usb-001:003:10: attached PHY driver(mii_bus:phy_addr=usb-001:003:10, irq=POLL) asix 1-1.4:1.0 eth0: register 'asix' at usb-12110000.usb-1.4, ASIX AX88772 USB 2.0 Ethernet, a2:99:b6:cd:11:eb asix 1-1.4:1.0 eth0: configuring for phy/internal link mode ============================================ WARNING: possible recursive locking detected 6.6.0-rc1-00239-g8da77df649c4-dirty #13949 Not tainted -------------------------------------------- kworker/3:3/71 is trying to acquire lock: c6c704cc (&dev->lock){+.+.}-{3:3}, at: phy_start_aneg+0x1c/0x38 but task is already holding lock: c6c704cc (&dev->lock){+.+.}-{3:3}, at: phy_state_machine+0x100/0x2b8 This is because we now consistently call phy_process_state_change() while holding phydev->lock, but the AX88772A PHY driver then goes on to call phy_start_aneg() which tries to grab the same lock - causing deadlock. Fix this by exporting the unlocked version, and use this in the PHY driver instead. Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Fixes: ef113a60d0a9 ("net: phy: call phy_error_precise() while holding the lock") Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/E1qiEFs-007g7b-Lq@rmk-PC.armlinux.org.uk Signed-off-by: Paolo Abeni --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 1351b802ffcf..3cc52826f18e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1736,6 +1736,7 @@ void phy_detach(struct phy_device *phydev); void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_config_aneg(struct phy_device *phydev); +int _phy_start_aneg(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); int phy_speed_down(struct phy_device *phydev, bool sync); -- cgit v1.2.3 From fa17a6d8a5bd0cd7565b613cb804242cd0f6b7ab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Sep 2023 14:23:21 +0000 Subject: ipv6: lockless IPV6_ADDR_PREFERENCES implementation We have data-races while reading np->srcprefs Switch the field to a plain byte, add READ_ONCE() and WRITE_ONCE() annotations where needed, and IPV6_ADDR_PREFERENCES setsockopt() can now be lockless. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20230918142321.1794107-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 09253825c99c..e400ff757f13 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -243,7 +243,7 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u8 srcprefs:3; /* 001: prefer temporary address + __u8 srcprefs; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ -- cgit v1.2.3 From bafd764a8baa87e19e6beeaa58eb85fcbbdd6b20 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 18 Sep 2023 12:29:07 +0200 Subject: net: ethernet: mtk_wed: rename mtk_rxbm_desc in mtk_wed_bm_desc Rename mtk_rxbm_desc structure in mtk_wed_bm_desc since it will be used even on tx side by MT7988 SoC. Signed-off-by: Lorenzo Bianconi Signed-off-by: Paolo Abeni --- include/linux/soc/mediatek/mtk_wed.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index b2b28180dff7..c6512c216b27 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -45,7 +45,7 @@ enum mtk_wed_wo_cmd { MTK_WED_WO_CMD_WED_END }; -struct mtk_rxbm_desc { +struct mtk_wed_bm_desc { __le32 buf0; __le32 token; } __packed __aligned(4); @@ -104,7 +104,7 @@ struct mtk_wed_device { struct { int size; - struct mtk_rxbm_desc *desc; + struct mtk_wed_bm_desc *desc; dma_addr_t desc_phys; } rx_buf_ring; -- cgit v1.2.3 From ff0ea57fa30e860d3373acd1383e9d9599144b58 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 18 Sep 2023 12:29:08 +0200 Subject: net: ethernet: mtk_wed: introduce mtk_wed_buf structure Introduce mtk_wed_buf structure to store both virtual and physical addresses allocated in mtk_wed_tx_buffer_alloc() routine. This is a preliminary patch to add WED support for MT7988 SoC since it relies on a different dma descriptor layout not storing page dma addresses. Co-developed-by: Sujuan Chen Signed-off-by: Sujuan Chen Signed-off-by: Lorenzo Bianconi Signed-off-by: Paolo Abeni --- include/linux/soc/mediatek/mtk_wed.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index c6512c216b27..5f00dc26582b 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -76,6 +76,11 @@ struct mtk_wed_wo_rx_stats { __le32 rx_drop_cnt; }; +struct mtk_wed_buf { + void *p; + dma_addr_t phy_addr; +}; + struct mtk_wed_device { #ifdef CONFIG_NET_MEDIATEK_SOC_WED const struct mtk_wed_ops *ops; @@ -97,7 +102,7 @@ struct mtk_wed_device { struct { int size; - void **pages; + struct mtk_wed_buf *pages; struct mtk_wdma_desc *desc; dma_addr_t desc_phys; } tx_buf_ring; -- cgit v1.2.3 From e2f64db13aa1d08e32621067e4fe16bbc114b375 Mon Sep 17 00:00:00 2001 From: Sujuan Chen Date: Mon, 18 Sep 2023 12:29:13 +0200 Subject: net: ethernet: mtk_wed: introduce WED support for MT7988 Similar to MT7986 and MT7622, enable Wireless Ethernet Ditpatcher for MT7988 in order to offload traffic forwarded from LAN/WLAN to WLAN/LAN Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: Sujuan Chen Signed-off-by: Paolo Abeni --- include/linux/soc/mediatek/mtk_wed.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index 5f00dc26582b..5b096f9f1975 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -138,6 +138,8 @@ struct mtk_wed_device { u32 wpdma_rx; bool wcid_512; + bool hw_rro; + bool msi; u16 token_start; unsigned int nbuf; @@ -211,10 +213,12 @@ mtk_wed_device_attach(struct mtk_wed_device *dev) return ret; } -static inline bool -mtk_wed_get_rx_capa(struct mtk_wed_device *dev) +static inline bool mtk_wed_get_rx_capa(struct mtk_wed_device *dev) { #ifdef CONFIG_NET_MEDIATEK_SOC_WED + if (dev->version == 3) + return dev->wlan.hw_rro; + return dev->version != 1; #else return false; -- cgit v1.2.3 From b230812b9dda125e69ab0a5a11cda88d9c0d18a9 Mon Sep 17 00:00:00 2001 From: Sujuan Chen Date: Mon, 18 Sep 2023 12:29:15 +0200 Subject: net: ethernet: mtk_wed: introduce partial AMSDU offload support for MT7988 Introduce partial AMSDU offload support for MT7988 SoC in order to merge in hw packets belonging to the same AMSDU before passing them to the WLAN nic. Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: Sujuan Chen Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 1 + include/linux/soc/mediatek/mtk_wed.h | 12 ++++++++++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index db3d8429d50d..7e520c14eb8c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -919,6 +919,7 @@ struct net_device_path { u8 queue; u16 wcid; u8 bss; + u8 amsdu; } mtk_wdma; }; }; diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index 5b096f9f1975..90d9c9ead3bc 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -128,6 +128,7 @@ struct mtk_wed_device { enum mtk_wed_bus_tye bus_type; void __iomem *base; u32 phy_base; + u32 id; u32 wpdma_phys; u32 wpdma_int; @@ -146,10 +147,12 @@ struct mtk_wed_device { unsigned int rx_nbuf; unsigned int rx_npkt; unsigned int rx_size; + unsigned int amsdu_max_len; u8 tx_tbit[MTK_WED_TX_QUEUES]; u8 rx_tbit[MTK_WED_RX_QUEUES]; u8 txfree_tbit; + u8 amsdu_max_subframes; u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id); int (*offload_enable)(struct mtk_wed_device *wed); @@ -225,6 +228,15 @@ static inline bool mtk_wed_get_rx_capa(struct mtk_wed_device *dev) #endif } +static inline bool mtk_wed_is_amsdu_supported(struct mtk_wed_device *dev) +{ +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + return dev->version == 3; +#else + return false; +#endif +} + #ifdef CONFIG_NET_MEDIATEK_SOC_WED #define mtk_wed_device_active(_dev) !!(_dev)->ops #define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev) -- cgit v1.2.3 From 6757d345dd7dba795f5af44d4442d55a83c4b1b4 Mon Sep 17 00:00:00 2001 From: Sujuan Chen Date: Mon, 18 Sep 2023 12:29:16 +0200 Subject: net: ethernet: mtk_wed: introduce hw_rro support for MT7988 MT7988 SoC support 802.11 receive reordering offload in hw while MT7986 SoC implements it through the firmware running on the mcu. Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: Sujuan Chen Signed-off-by: Paolo Abeni --- include/linux/soc/mediatek/mtk_wed.h | 45 ++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index 90d9c9ead3bc..a476648858a6 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -10,6 +10,7 @@ #define MTK_WED_TX_QUEUES 2 #define MTK_WED_RX_QUEUES 2 +#define MTK_WED_RX_PAGE_QUEUES 3 #define WED_WO_STA_REC 0x6 @@ -99,6 +100,9 @@ struct mtk_wed_device { struct mtk_wed_ring txfree_ring; struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES]; struct mtk_wed_ring rx_wdma[MTK_WED_RX_QUEUES]; + struct mtk_wed_ring rx_rro_ring[MTK_WED_RX_QUEUES]; + struct mtk_wed_ring rx_page_ring[MTK_WED_RX_PAGE_QUEUES]; + struct mtk_wed_ring ind_cmd_ring; struct { int size; @@ -119,6 +123,13 @@ struct mtk_wed_device { dma_addr_t fdbk_phys; } rro; + struct { + int size; + struct mtk_wed_buf *pages; + struct mtk_wed_bm_desc *desc; + dma_addr_t desc_phys; + } hw_rro; + /* filled by driver: */ struct { union { @@ -137,6 +148,8 @@ struct mtk_wed_device { u32 wpdma_txfree; u32 wpdma_rx_glo; u32 wpdma_rx; + u32 wpdma_rx_rro[MTK_WED_RX_QUEUES]; + u32 wpdma_rx_pg; bool wcid_512; bool hw_rro; @@ -151,9 +164,20 @@ struct mtk_wed_device { u8 tx_tbit[MTK_WED_TX_QUEUES]; u8 rx_tbit[MTK_WED_RX_QUEUES]; + u8 rro_rx_tbit[MTK_WED_RX_QUEUES]; + u8 rx_pg_tbit[MTK_WED_RX_PAGE_QUEUES]; u8 txfree_tbit; u8 amsdu_max_subframes; + struct { + u8 se_group_nums; + u16 win_size; + u16 particular_sid; + u32 ack_sn_addr; + dma_addr_t particular_se_phys; + dma_addr_t addr_elem_phys[1024]; + } ind_cmd; + u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id); int (*offload_enable)(struct mtk_wed_device *wed); void (*offload_disable)(struct mtk_wed_device *wed); @@ -192,6 +216,14 @@ struct mtk_wed_ops { void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask); int (*setup_tc)(struct mtk_wed_device *wed, struct net_device *dev, enum tc_setup_type type, void *type_data); + void (*start_hw_rro)(struct mtk_wed_device *dev, u32 irq_mask, + bool reset); + void (*rro_rx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + void (*msdu_pg_rx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + int (*ind_rx_ring_setup)(struct mtk_wed_device *dev, + void __iomem *regs); }; extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops; @@ -263,6 +295,15 @@ static inline bool mtk_wed_is_amsdu_supported(struct mtk_wed_device *dev) #define mtk_wed_device_dma_reset(_dev) (_dev)->ops->reset_dma(_dev) #define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) \ (_dev)->ops->setup_tc(_dev, _netdev, _type, _type_data) +#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) \ + (_dev)->ops->start_hw_rro(_dev, _mask, _reset) +#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->rro_rx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->msdu_pg_rx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) \ + (_dev)->ops->ind_rx_ring_setup(_dev, _regs) + #else static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) { @@ -282,6 +323,10 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) #define mtk_wed_device_stop(_dev) do {} while (0) #define mtk_wed_device_dma_reset(_dev) do {} while (0) #define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) -EOPNOTSUPP +#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) do {} while (0) +#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) -ENODEV #endif #endif -- cgit v1.2.3 From 9ea9cb00a82b53ec39630eac718776d37e41b35a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Sep 2023 11:21:39 -0400 Subject: mm: memcontrol: fix GFP_NOFS recursion in memory.high enforcement Breno and Josef report a deadlock scenario from cgroup reclaim re-entering the filesystem: [ 361.546690] ====================================================== [ 361.559210] WARNING: possible circular locking dependency detected [ 361.571703] 6.5.0-0_fbk700_debug_rc0_kbuilder_13159_gbf787a128001 #1 Tainted: G S E [ 361.589704] ------------------------------------------------------ [ 361.602277] find/9315 is trying to acquire lock: [ 361.611625] ffff88837ba140c0 (&delayed_node->mutex){+.+.}-{4:4}, at: __btrfs_release_delayed_node+0x68/0x4f0 [ 361.631437] [ 361.631437] but task is already holding lock: [ 361.643243] ffff8881765b8678 (btrfs-tree-01){++++}-{4:4}, at: btrfs_tree_read_lock+0x1e/0x40 [ 362.904457] mutex_lock_nested+0x1c/0x30 [ 362.912414] __btrfs_release_delayed_node+0x68/0x4f0 [ 362.922460] btrfs_evict_inode+0x301/0x770 [ 362.982726] evict+0x17c/0x380 [ 362.988944] prune_icache_sb+0x100/0x1d0 [ 363.005559] super_cache_scan+0x1f8/0x260 [ 363.013695] do_shrink_slab+0x2a2/0x540 [ 363.021489] shrink_slab_memcg+0x237/0x3d0 [ 363.050606] shrink_slab+0xa7/0x240 [ 363.083382] shrink_node_memcgs+0x262/0x3b0 [ 363.091870] shrink_node+0x1a4/0x720 [ 363.099150] shrink_zones+0x1f6/0x5d0 [ 363.148798] do_try_to_free_pages+0x19b/0x5e0 [ 363.157633] try_to_free_mem_cgroup_pages+0x266/0x370 [ 363.190575] reclaim_high+0x16f/0x1f0 [ 363.208409] mem_cgroup_handle_over_high+0x10b/0x270 [ 363.246678] try_charge_memcg+0xaf2/0xc70 [ 363.304151] charge_memcg+0xf0/0x350 [ 363.320070] __mem_cgroup_charge+0x28/0x40 [ 363.328371] __filemap_add_folio+0x870/0xd50 [ 363.371303] filemap_add_folio+0xdd/0x310 [ 363.399696] __filemap_get_folio+0x2fc/0x7d0 [ 363.419086] pagecache_get_page+0xe/0x30 [ 363.427048] alloc_extent_buffer+0x1cd/0x6a0 [ 363.435704] read_tree_block+0x43/0xc0 [ 363.443316] read_block_for_search+0x361/0x510 [ 363.466690] btrfs_search_slot+0xc8c/0x1520 This is caused by the mem_cgroup_handle_over_high() not respecting the gfp_mask of the allocation context. We used to only call this function on resume to userspace, where no locks were held. But c9afe31ec443 ("memcg: synchronously enforce memory.high for large overcharges") added a call from the allocation context without considering the gfp. Link: https://lkml.kernel.org/r/20230914152139.100822-1-hannes@cmpxchg.org Fixes: c9afe31ec443 ("memcg: synchronously enforce memory.high for large overcharges") Signed-off-by: Johannes Weiner Reported-by: Breno Leitao Reported-by: Josef Bacik Acked-by: Shakeel Butt Acked-by: Michal Hocko Cc: Roman Gushchin Cc: Muchun Song Cc: [5.17+] Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 4 ++-- include/linux/resume_user_mode.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ab94ad4597d0..e4e24da16d2c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -920,7 +920,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); } -void mem_cgroup_handle_over_high(void); +void mem_cgroup_handle_over_high(gfp_t gfp_mask); unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg); @@ -1458,7 +1458,7 @@ static inline void mem_cgroup_unlock_pages(void) rcu_read_unlock(); } -static inline void mem_cgroup_handle_over_high(void) +static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask) { } diff --git a/include/linux/resume_user_mode.h b/include/linux/resume_user_mode.h index 285189454449..f8f3e958e9cf 100644 --- a/include/linux/resume_user_mode.h +++ b/include/linux/resume_user_mode.h @@ -55,7 +55,7 @@ static inline void resume_user_mode_work(struct pt_regs *regs) } #endif - mem_cgroup_handle_over_high(); + mem_cgroup_handle_over_high(GFP_KERNEL); blkcg_maybe_throttle_current(); rseq_handle_notify_resume(NULL, regs); -- cgit v1.2.3 From b724a6418f1f853bcb39c8923bf14a50c7bdbd07 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Sun, 17 Sep 2023 23:38:46 +0800 Subject: bpf: Fix tr dereferencing Fix 'tr' dereferencing bug when CONFIG_BPF_JIT is turned off. When CONFIG_BPF_JIT is turned off, 'bpf_trampoline_get()' returns NULL, which is same as the cases when CONFIG_BPF_JIT is turned on. Closes: https://lore.kernel.org/r/202309131936.5Nc8eUD0-lkp@intel.com/ Fixes: f7b12b6fea00 ("bpf: verifier: refactor check_attach_btf_id()") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Leon Hwang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230917153846.88732-1-hffilwlqm@gmail.com --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 024e8b28c34b..49f8b691496c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1307,7 +1307,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, static inline struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info) { - return ERR_PTR(-EOPNOTSUPP); + return NULL; } static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #define DEFINE_BPF_DISPATCHER(name) -- cgit v1.2.3 From 54e1f99d91405417b3ddb6050cfba82733c3aa41 Mon Sep 17 00:00:00 2001 From: Komal Bajaj Date: Wed, 30 Aug 2023 16:26:51 +0530 Subject: nvmem: core: Add stub for nvmem_cell_read_u8 Add the stub nvmem_cell_read_u8() function for drivers running with CONFIG_NVMEM disabled. Signed-off-by: Komal Bajaj Reviewed-by: Mukesh Ojha Link: https://lore.kernel.org/r/20230830105654.28057-4-quic_kbajaj@quicinc.com Signed-off-by: Bjorn Andersson --- include/linux/nvmem-consumer.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index 4523e4e83319..6ec4b9743e25 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -127,6 +127,12 @@ static inline int nvmem_cell_write(struct nvmem_cell *cell, return -EOPNOTSUPP; } +static inline int nvmem_cell_read_u8(struct device *dev, + const char *cell_id, u8 *val) +{ + return -EOPNOTSUPP; +} + static inline int nvmem_cell_read_u16(struct device *dev, const char *cell_id, u16 *val) { -- cgit v1.2.3 From 0bc76be64e80b15b975345b6957a87a1893c34f2 Mon Sep 17 00:00:00 2001 From: Komal Bajaj Date: Wed, 30 Aug 2023 16:26:53 +0530 Subject: soc: qcom: llcc: Updating the macro name Update macro name for LLCC_DRE to LLCC_ECC as per the latest specification. Signed-off-by: Komal Bajaj Reviewed-by: Mukesh Ojha Acked-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230830105654.28057-6-quic_kbajaj@quicinc.com Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/llcc-qcom.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 93417ba1ead4..1a886666bbb6 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -30,7 +30,7 @@ #define LLCC_NPU 23 #define LLCC_WLHW 24 #define LLCC_PIMEM 25 -#define LLCC_DRE 26 +#define LLCC_ECC 26 #define LLCC_CVP 28 #define LLCC_MODPE 29 #define LLCC_APTCM 30 -- cgit v1.2.3 From b64d143b752932ef483d0ed8d00958f1832dd6bc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 14 Sep 2023 16:28:23 +0800 Subject: crypto: hash - Hide CRYPTO_ALG_TYPE_AHASH_MASK Move the macro CRYPTO_ALG_TYPE_AHASH_MASK out of linux/crypto.h and into crypto/ahash.c so that it's not visible to users of the Crypto API. Also remove the unused CRYPTO_ALG_TYPE_HASH_MASK macro. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 31f6fee0c36c..a0780deb017a 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -35,8 +35,6 @@ #define CRYPTO_ALG_TYPE_SHASH 0x0000000e #define CRYPTO_ALG_TYPE_AHASH 0x0000000f -#define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e -#define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_ACOMPRESS_MASK 0x0000000e #define CRYPTO_ALG_LARVAL 0x00000010 -- cgit v1.2.3 From 31865c4c4db2b742fec6ccbff80483fa3e7ab9b9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 14 Sep 2023 16:28:24 +0800 Subject: crypto: skcipher - Add lskcipher Add a new API type lskcipher designed for taking straight kernel pointers instead of SG lists. Its relationship to skcipher will be analogous to that between shash and ahash. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index a0780deb017a..f3c3a3b27fac 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -24,6 +24,7 @@ #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 #define CRYPTO_ALG_TYPE_COMPRESS 0x00000002 #define CRYPTO_ALG_TYPE_AEAD 0x00000003 +#define CRYPTO_ALG_TYPE_LSKCIPHER 0x00000004 #define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_AKCIPHER 0x00000006 #define CRYPTO_ALG_TYPE_SIG 0x00000007 -- cgit v1.2.3 From 2a86f1b56a30e242caf7ee1268af68f4f49ce847 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 20 Sep 2023 14:26:29 +0800 Subject: kasan: Cleanup the __HAVE_ARCH_SHADOW_MAP usage As Linus suggested, __HAVE_ARCH_XYZ is "stupid" and "having historical uses of it doesn't make it good". So migrate __HAVE_ARCH_SHADOW_MAP to separate macros named after the respective functions. Suggested-by: Linus Torvalds Reviewed-by: WANG Xuerui Reviewed-by: Andrey Konovalov Signed-off-by: Huacai Chen --- include/linux/kasan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 3df5499f7936..842623d708c2 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -54,7 +54,7 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; int kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end); -#ifndef __HAVE_ARCH_SHADOW_MAP +#ifndef kasan_mem_to_shadow static inline void *kasan_mem_to_shadow(const void *addr) { return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) -- cgit v1.2.3 From 653b7eb9d74426397c95061fd57da3063625af65 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Mon, 28 Aug 2023 14:20:00 +0300 Subject: net/mlx5: Bridge, Enable mcast in smfs steering mode In order to have mcast offloads the driver needs the following: It should know if that mcast comes from wire port, in addition the flow should not be marked as any specific source, that way it will give the flexibility for the driver not to be depended on the way iterator implemented in the FW. Signed-off-by: Erez Shitrit Reviewed-by: Moshe Shemesh Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 1e00c2436377..6f7725238abc 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -67,6 +67,7 @@ enum { MLX5_FLOW_TABLE_TERMINATION = BIT(2), MLX5_FLOW_TABLE_UNMANAGED = BIT(3), MLX5_FLOW_TABLE_OTHER_VPORT = BIT(4), + MLX5_FLOW_TABLE_UPLINK_VPORT = BIT(5), }; #define LEFTOVERS_RULE_NUM 2 -- cgit v1.2.3 From e0cc92fd945a9c8e43d4268cf1ea985d0e99a90f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 3 Aug 2023 16:39:40 +0300 Subject: net/mlx5: Add a health error syndrome for pci data poisoned Add new health error syndrome to indicate that pci data poisoned error has been received while fetching device ICM data. Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index dd8421d021cf..b23d8ff286a1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10574,6 +10574,7 @@ enum { MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV = 0xe, MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR = 0xf, MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR = 0x10, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR = 0x12, }; struct mlx5_ifc_initial_seg_bits { -- cgit v1.2.3 From 6b596e62ed9f90c4a97e68ae1f7b1af5beeb3c05 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Sep 2023 18:22:51 +0200 Subject: sched: Provide rt_mutex specific scheduler helpers With PREEMPT_RT there is a rt_mutex recursion problem where sched_submit_work() can use an rtlock (aka spinlock_t). More specifically what happens is: mutex_lock() /* really rt_mutex */ ... __rt_mutex_slowlock_locked() task_blocks_on_rt_mutex() // enqueue current task as waiter // do PI chain walk rt_mutex_slowlock_block() schedule() sched_submit_work() ... spin_lock() /* really rtlock */ ... __rt_mutex_slowlock_locked() task_blocks_on_rt_mutex() // enqueue current task as waiter *AGAIN* // *CONFUSION* Fix this by making rt_mutex do the sched_submit_work() early, before it enqueues itself as a waiter -- before it even knows *if* it will wait. [[ basically Thomas' patch but with different naming and a few asserts added ]] Originally-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230908162254.999499-5-bigeasy@linutronix.de --- include/linux/sched.h | 3 +++ include/linux/sched/rt.h | 4 ++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 77f01ac385f7..67623ffd4a8e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -911,6 +911,9 @@ struct task_struct { * ->sched_remote_wakeup gets used, so it can be in this word. */ unsigned sched_remote_wakeup:1; +#ifdef CONFIG_RT_MUTEXES + unsigned sched_rt_mutex:1; +#endif /* Bit to tell LSMs we're in execve(): */ unsigned in_execve:1; diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 994c25640e15..b2b9e6eb9683 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -30,6 +30,10 @@ static inline bool task_is_realtime(struct task_struct *tsk) } #ifdef CONFIG_RT_MUTEXES +extern void rt_mutex_pre_schedule(void); +extern void rt_mutex_schedule(void); +extern void rt_mutex_post_schedule(void); + /* * Must hold either p->pi_lock or task_rq(p)->lock. */ -- cgit v1.2.3 From 6d2779ecaeb56f92d7105c56772346c71c88c278 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Sep 2023 18:14:29 +0100 Subject: locking/atomic: scripts: fix fallback ifdeffery Since commit: 9257959a6e5b4fca ("locking/atomic: scripts: restructure fallback ifdeffery") The ordering fallbacks for atomic*_read_acquire() and atomic*_set_release() erroneously fall back to the implictly relaxed atomic*_read() and atomic*_set() variants respectively, without any additional barriers. This loses the ACQUIRE and RELEASE ordering semantics, which can result in a wide variety of problems, even on strongly-ordered architectures where the implementation of atomic*_read() and/or atomic*_set() allows the compiler to reorder those relative to other accesses. In practice this has been observed to break bit spinlocks on arm64, resulting in dentry cache corruption. The fallback logic was intended to allow ACQUIRE/RELEASE/RELAXED ops to be defined in terms of FULL ops, but where an op had RELAXED ordering by default, this unintentionally permitted the ACQUIRE/RELEASE ops to be defined in terms of the implicitly RELAXED default. This patch corrects the logic to avoid falling back to implicitly RELAXED ops, resulting in the same behaviour as prior to commit 9257959a6e5b4fca. I've verified the resulting assembly on arm64 by generating outlined wrappers of the atomics. Prior to this patch the compiler generates sequences using relaxed load (LDR) and store (STR) instructions, e.g. | : | ldr x0, [x0] | ret | | : | str x1, [x0] | ret With this patch applied the compiler generates sequences using the intended load-acquire (LDAR) and store-release (STLR) instructions, e.g. | : | ldar x0, [x0] | ret | | : | stlr x1, [x0] | ret To make sure that there were no other victims of the ifdeffery rewrite, I generated outlined copies of all of the {atomic,atomic64,atomic_long} atomic operations before and after commit 9257959a6e5b4fca. A diff of the generated assembly on arm64 shows that only the read_acquire() and set_release() operations were changed, and only lost their intended ordering: | [mark@lakrids:~/src/linux]% diff -u \ | <(aarch64-linux-gnu-objdump -d before-9257959a6e5b4fca.o) | <(aarch64-linux-gnu-objdump -d after-9257959a6e5b4fca.o) | --- /proc/self/fd/11 2023-09-19 16:51:51.114779415 +0100 | +++ /proc/self/fd/16 2023-09-19 16:51:51.114779415 +0100 | @@ -1,5 +1,5 @@ | | -before-9257959a6e5b4fca.o: file format elf64-littleaarch64 | +after-9257959a6e5b4fca.o: file format elf64-littleaarch64 | | | Disassembly of section .text: | @@ -9,7 +9,7 @@ | 4: d65f03c0 ret | | 0000000000000008 : | - 8: 88dffc00 ldar w0, [x0] | + 8: b9400000 ldr w0, [x0] | c: d65f03c0 ret | | 0000000000000010 : | @@ -17,7 +17,7 @@ | 14: d65f03c0 ret | | 0000000000000018 : | - 18: 889ffc01 stlr w1, [x0] | + 18: b9000001 str w1, [x0] | 1c: d65f03c0 ret | | 0000000000000020 : | @@ -1230,7 +1230,7 @@ | 1070: d65f03c0 ret | | 0000000000001074 : | - 1074: c8dffc00 ldar x0, [x0] | + 1074: f9400000 ldr x0, [x0] | 1078: d65f03c0 ret | | 000000000000107c : | @@ -1238,7 +1238,7 @@ | 1080: d65f03c0 ret | | 0000000000001084 : | - 1084: c89ffc01 stlr x1, [x0] | + 1084: f9000001 str x1, [x0] | 1088: d65f03c0 ret | | 000000000000108c : | @@ -2427,7 +2427,7 @@ | 207c: d65f03c0 ret | | 0000000000002080 : | - 2080: c8dffc00 ldar x0, [x0] | + 2080: f9400000 ldr x0, [x0] | 2084: d65f03c0 ret | | 0000000000002088 : | @@ -2435,7 +2435,7 @@ | 208c: d65f03c0 ret | | 0000000000002090 : | - 2090: c89ffc01 stlr x1, [x0] | + 2090: f9000001 str x1, [x0] | 2094: d65f03c0 ret | | 0000000000002098 : I've build tested this with a variety of configs for alpha, arm, arm64, csky, i386, m68k, microblaze, mips, nios2, openrisc, powerpc, riscv, s390, sh, sparc, x86_64, and xtensa, for which I've seen no issues. I was unable to build test for ia64 and parisc due to existing build breakage in v6.6-rc2. Fixes: 9257959a6e5b4fca ("locking/atomic: scripts: restructure fallback ifdeffery") Reported-by: Ming Lei Reported-by: Darrick J. Wong Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Tested-by: Baokun Li Link: https://lkml.kernel.org/r/20230919171430.2697727-1-mark.rutland@arm.com --- include/linux/atomic/atomic-arch-fallback.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index 18f5744dfb5d..b83ef19da13d 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -459,8 +459,6 @@ raw_atomic_read_acquire(const atomic_t *v) { #if defined(arch_atomic_read_acquire) return arch_atomic_read_acquire(v); -#elif defined(arch_atomic_read) - return arch_atomic_read(v); #else int ret; @@ -508,8 +506,6 @@ raw_atomic_set_release(atomic_t *v, int i) { #if defined(arch_atomic_set_release) arch_atomic_set_release(v, i); -#elif defined(arch_atomic_set) - arch_atomic_set(v, i); #else if (__native_word(atomic_t)) { smp_store_release(&(v)->counter, i); @@ -2575,8 +2571,6 @@ raw_atomic64_read_acquire(const atomic64_t *v) { #if defined(arch_atomic64_read_acquire) return arch_atomic64_read_acquire(v); -#elif defined(arch_atomic64_read) - return arch_atomic64_read(v); #else s64 ret; @@ -2624,8 +2618,6 @@ raw_atomic64_set_release(atomic64_t *v, s64 i) { #if defined(arch_atomic64_set_release) arch_atomic64_set_release(v, i); -#elif defined(arch_atomic64_set) - arch_atomic64_set(v, i); #else if (__native_word(atomic64_t)) { smp_store_release(&(v)->counter, i); @@ -4657,4 +4649,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v) } #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 202b45c7db600ce36198eb1f1fc2c2d5268ace2d +// 2fdd6702823fa842f9cea57a002e6e4476ae780c -- cgit v1.2.3 From 03a95cf233b5bdd65ddd4eca63cd4874b6b84d50 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Sat, 26 Aug 2023 13:53:03 +0100 Subject: firmware: arm_scmi: Simplify enable/disable clock operations SCMI clock enable/disable operations come in 2 different flavours which simply just differ in how the underlying SCMI transactions is carried on: atomic or not. Currently we expose such SCMI operations through 2 distinctly named wrappers, that, in turn, are wrapped into another couple of similarly and distinctly named callbacks inside SCMI clock driver user. Reduce the churn of duplicated wrappers by adding a param to SCMI clock enable/disable operations to ask for atomic operation while removing the _atomic version of such operations. No functional change. CC: Michael Turquette CC: Stephen Boyd CC: linux-clk@vger.kernel.org Signed-off-by: Cristian Marussi Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/20230826125308.462328-2-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index e6fe4f73ffe6..b4c631a8d0ac 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -90,11 +90,10 @@ struct scmi_clk_proto_ops { u64 *rate); int (*rate_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u64 rate); - int (*enable)(const struct scmi_protocol_handle *ph, u32 clk_id); - int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id); - int (*enable_atomic)(const struct scmi_protocol_handle *ph, u32 clk_id); - int (*disable_atomic)(const struct scmi_protocol_handle *ph, - u32 clk_id); + int (*enable)(const struct scmi_protocol_handle *ph, u32 clk_id, + bool atomic); + int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id, + bool atomic); }; /** -- cgit v1.2.3 From 34592bf0a5cb0681ce3d64de5598951768f43328 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Sat, 26 Aug 2023 13:53:05 +0100 Subject: firmware: arm_scmi: Add v3.2 clock CONFIG_GET support Add support for v3.2 clock CONFIG_GET command and related new clock protocol operation state_get() to retrieve the status of a clock. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20230826125308.462328-4-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index b4c631a8d0ac..d11ca4286d57 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -80,6 +80,7 @@ struct scmi_protocol_handle; * @rate_set: set the clock rate of a clock * @enable: enables the specified clock * @disable: disables the specified clock + * @state_get: get the status of the specified clock */ struct scmi_clk_proto_ops { int (*count_get)(const struct scmi_protocol_handle *ph); @@ -94,6 +95,8 @@ struct scmi_clk_proto_ops { bool atomic); int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id, bool atomic); + int (*state_get)(const struct scmi_protocol_handle *ph, u32 clk_id, + bool *enabled, bool atomic); }; /** -- cgit v1.2.3 From 141b4fa0362569138653cf0165d92d48576db3fa Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Sat, 26 Aug 2023 13:53:08 +0100 Subject: firmware: arm_scmi: Add clock OEM config clock operations Expose a couple of new SCMI clock operations to get and set OEM specific clock configurations when talking to an SCMI v3.2 compliant. Issuing such requests against an SCMI platform server not supporting v3.2 extension for OEM specific clock configurations will fail. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20230826125308.462328-7-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index d11ca4286d57..e09ac428fa1b 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -81,6 +81,8 @@ struct scmi_protocol_handle; * @enable: enables the specified clock * @disable: disables the specified clock * @state_get: get the status of the specified clock + * @config_oem_get: get the value of an OEM specific clock config + * @config_oem_set: set the value of an OEM specific clock config */ struct scmi_clk_proto_ops { int (*count_get)(const struct scmi_protocol_handle *ph); @@ -97,6 +99,11 @@ struct scmi_clk_proto_ops { bool atomic); int (*state_get)(const struct scmi_protocol_handle *ph, u32 clk_id, bool *enabled, bool atomic); + int (*config_oem_get)(const struct scmi_protocol_handle *ph, u32 clk_id, + u8 oem_type, u32 *oem_val, u32 *attributes, + bool atomic); + int (*config_oem_set)(const struct scmi_protocol_handle *ph, u32 clk_id, + u8 oem_type, u32 oem_val, bool atomic); }; /** -- cgit v1.2.3 From 647aa768281f38cb1002edb3a1f673c3d66a8d81 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 20 Sep 2023 16:40:13 +0200 Subject: Revert "fs: add infrastructure for multigrain timestamps" This reverts commit ffb6cf19e06334062744b7e3493f71e500964f8e. Users reported regressions due to enabling multi-grained timestamps unconditionally. As no clear consensus on a solution has come up and the discussion has gone back to the drawing board revert the infrastructure changes for. If it isn't code that's here to stay, make it go away. Message-ID: <20230920-keine-eile-c9755b5825db@brauner> Acked-by: Jan Kara Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/fs.h | 46 ++-------------------------------------------- 1 file changed, 2 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4aeb3fa11927..b528f063e8ff 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1508,47 +1508,18 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, kgid_has_mapping(fs_userns, kgid); } -struct timespec64 current_mgtime(struct inode *inode); struct timespec64 current_time(struct inode *inode); struct timespec64 inode_set_ctime_current(struct inode *inode); -/* - * Multigrain timestamps - * - * Conditionally use fine-grained ctime and mtime timestamps when there - * are users actively observing them via getattr. The primary use-case - * for this is NFS clients that use the ctime to distinguish between - * different states of the file, and that are often fooled by multiple - * operations that occur in the same coarse-grained timer tick. - * - * The kernel always keeps normalized struct timespec64 values in the ctime, - * which means that only the first 30 bits of the value are used. Use the - * 31st bit of the ctime's tv_nsec field as a flag to indicate that the value - * has been queried since it was last updated. - */ -#define I_CTIME_QUERIED (1L<<30) - /** * inode_get_ctime - fetch the current ctime from the inode * @inode: inode from which to fetch ctime * - * Grab the current ctime tv_nsec field from the inode, mask off the - * I_CTIME_QUERIED flag and return it. This is mostly intended for use by - * internal consumers of the ctime that aren't concerned with ensuring a - * fine-grained update on the next change (e.g. when preparing to store - * the value in the backing store for later retrieval). - * - * This is safe to call regardless of whether the underlying filesystem - * is using multigrain timestamps. + * Grab the current ctime from the inode and return it. */ static inline struct timespec64 inode_get_ctime(const struct inode *inode) { - struct timespec64 ctime; - - ctime.tv_sec = inode->__i_ctime.tv_sec; - ctime.tv_nsec = inode->__i_ctime.tv_nsec & ~I_CTIME_QUERIED; - - return ctime; + return inode->__i_ctime; } /** @@ -2334,7 +2305,6 @@ struct file_system_type { #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ -#define FS_MGTIME 64 /* FS uses multigrain timestamps */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; @@ -2358,17 +2328,6 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) -/** - * is_mgtime: is this inode using multigrain timestamps - * @inode: inode to test for multigrain timestamps - * - * Return true if the inode uses multigrain timestamps, false otherwise. - */ -static inline bool is_mgtime(const struct inode *inode) -{ - return inode->i_sb->s_type->fs_flags & FS_MGTIME; -} - extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -3054,7 +3013,6 @@ extern void page_put_link(void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); -void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode); void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); -- cgit v1.2.3 From 41b43b6c6e30a832c790b010a06772e793bca193 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 20 Sep 2023 12:46:27 +0200 Subject: locking/seqlock: Do the lockdep annotation before locking in do_write_seqcount_begin_nested() It was brought up by Tetsuo that the following sequence: write_seqlock_irqsave() printk_deferred_enter() could lead to a deadlock if the lockdep annotation within write_seqlock_irqsave() triggers. The problem is that the sequence counter is incremented before the lockdep annotation is performed. The lockdep splat would then attempt to invoke printk() but the reader side, of the same seqcount, could have a tty_port::lock acquired waiting for the sequence number to become even again. The other lockdep annotations come before the actual locking because "we want to see the locking error before it happens". There is no reason why seqcount should be different here. Do the lockdep annotation first then perform the locking operation (the sequence increment). Fixes: 1ca7d67cf5d5a ("seqcount: Add lockdep functionality to seqcount/seqlock structures") Reported-by: Tetsuo Handa Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230920104627._DTHgPyA@linutronix.de Closes: https://lore.kernel.org/20230621130641.-5iueY1I@linutronix.de --- include/linux/seqlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 987a59d977c5..e9bd2f65d7f4 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -512,8 +512,8 @@ do { \ static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass) { - do_raw_write_seqcount_begin(s); seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); + do_raw_write_seqcount_begin(s); } /** -- cgit v1.2.3 From 87c3a5893e865739ce78aa7192d36011022e0af7 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Fri, 15 Sep 2023 15:47:11 +1000 Subject: sched/core: Optimize in_task() and in_interrupt() a bit Except on x86, preempt_count is always accessed with READ_ONCE(). Repeated invocations in macros like irq_count() produce repeated loads. These redundant instructions appear in various fast paths. In the one shown below, for example, irq_count() is evaluated during kernel entry if !tick_nohz_full_cpu(smp_processor_id()). 0001ed0a : 1ed0a: 4e56 0000 linkw %fp,#0 1ed0e: 200f movel %sp,%d0 1ed10: 0280 ffff e000 andil #-8192,%d0 1ed16: 2040 moveal %d0,%a0 1ed18: 2028 0008 movel %a0@(8),%d0 1ed1c: 0680 0001 0000 addil #65536,%d0 1ed22: 2140 0008 movel %d0,%a0@(8) 1ed26: 082a 0001 000f btst #1,%a2@(15) 1ed2c: 670c beqs 1ed3a 1ed2e: 2028 0008 movel %a0@(8),%d0 1ed32: 2028 0008 movel %a0@(8),%d0 1ed36: 2028 0008 movel %a0@(8),%d0 1ed3a: 4e5e unlk %fp 1ed3c: 4e75 rts This patch doesn't prevent the pointless btst and beqs instructions above, but it does eliminate 2 of the 3 pointless move instructions here and elsewhere. On x86, preempt_count is per-cpu data and the problem does not arise presumably because the compiler is free to optimize more effectively. This patch was tested on m68k and x86. I was expecting no changes to object code for x86 and mostly that's what I saw. However, there were a few places where code generation was perturbed for some reason. The performance issue addressed here is minor on uniprocessor m68k. I got a 0.01% improvement from this patch for a simple "find /sys -false" benchmark. For architectures and workloads susceptible to cache line bounce the improvement is expected to be larger. The only SMP architecture I have is x86, and as x86 unaffected I have not done any further measurements. Fixes: 15115830c887 ("preempt: Cleanup the macro maze a bit") Signed-off-by: Finn Thain Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/0a403120a682a525e6db2d81d1a3ffcc137c3742.1694756831.git.fthain@linux-m68k.org --- include/linux/preempt.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 1424670df161..9aa6358a1a16 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -99,14 +99,21 @@ static __always_inline unsigned char interrupt_context_level(void) return level; } +/* + * These macro definitions avoid redundant invocations of preempt_count() + * because such invocations would result in redundant loads given that + * preempt_count() is commonly implemented with READ_ONCE(). + */ + #define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #ifdef CONFIG_PREEMPT_RT # define softirq_count() (current->softirq_disable_cnt & SOFTIRQ_MASK) +# define irq_count() ((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | softirq_count()) #else # define softirq_count() (preempt_count() & SOFTIRQ_MASK) +# define irq_count() (preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK)) #endif -#define irq_count() (nmi_count() | hardirq_count() | softirq_count()) /* * Macros to retrieve the current execution context: @@ -119,7 +126,11 @@ static __always_inline unsigned char interrupt_context_level(void) #define in_nmi() (nmi_count()) #define in_hardirq() (hardirq_count()) #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) -#define in_task() (!(in_nmi() | in_hardirq() | in_serving_softirq())) +#ifdef CONFIG_PREEMPT_RT +# define in_task() (!((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | in_serving_softirq())) +#else +# define in_task() (!(preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) +#endif /* * The following macros are deprecated and should not be used in new code: -- cgit v1.2.3 From 3ba78da711940ce07c39c4cdd1f4ad284067a42d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 6 Jun 2021 13:27:15 +0200 Subject: sched/headers: Add header guard to It's the only non-trivial header in include/linux/sched/ missing a header guard. Signed-off-by: Ingo Molnar --- include/linux/sched/deadline.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 7c83d4d5a971..df3aca89d4f5 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SCHED_DEADLINE_H +#define _LINUX_SCHED_DEADLINE_H /* * SCHED_DEADLINE tasks has negative priorities, reflecting @@ -34,3 +36,5 @@ extern void dl_add_task_root_domain(struct task_struct *p); extern void dl_clear_root_domain(struct root_domain *rd); #endif /* CONFIG_SMP */ + +#endif /* _LINUX_SCHED_DEADLINE_H */ -- cgit v1.2.3 From 6eddb116dd830436afbd922568292867de6c8b9e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Sep 2023 11:24:17 +0200 Subject: sched/headers: Standardize the header guard name Use the same _LINUX_SCHED_ prefix nomenclature as the other 29 header guards in include/linux/sched/ do. Signed-off-by: Ingo Molnar --- include/linux/sched/vhost_task.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/vhost_task.h b/include/linux/sched/vhost_task.h index 837a23624a66..bc60243d43b3 100644 --- a/include/linux/sched/vhost_task.h +++ b/include/linux/sched/vhost_task.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_VHOST_TASK_H -#define _LINUX_VHOST_TASK_H - +#ifndef _LINUX_SCHED_VHOST_TASK_H +#define _LINUX_SCHED_VHOST_TASK_H struct vhost_task; @@ -11,4 +10,4 @@ void vhost_task_start(struct vhost_task *vtsk); void vhost_task_stop(struct vhost_task *vtsk); void vhost_task_wake(struct vhost_task *vtsk); -#endif +#endif /* _LINUX_SCHED_VHOST_TASK_H */ -- cgit v1.2.3 From 0f9a1a4d234c064d8dff69cf3f3755554dd479ec Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Sep 2023 11:27:37 +0200 Subject: sched/headers: Standardize the header guard #endif Signed-off-by: Ingo Molnar --- include/linux/sched/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/types.h b/include/linux/sched/types.h index 3c3e049224ae..969aaf5ef9d6 100644 --- a/include/linux/sched/types.h +++ b/include/linux/sched/types.h @@ -20,4 +20,4 @@ struct task_cputime { unsigned long long sum_exec_runtime; }; -#endif +#endif /* _LINUX_SCHED_TYPES_H */ -- cgit v1.2.3 From 1632d47fae2f2d229dd432854c4443ebb0bb27a4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Sep 2023 11:28:48 +0200 Subject: sched/headers: Standardize the header guard #endif Signed-off-by: Ingo Molnar --- include/linux/sched/smt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h index 59d3736c454c..fb1e295e7e63 100644 --- a/include/linux/sched/smt.h +++ b/include/linux/sched/smt.h @@ -17,4 +17,4 @@ static inline bool sched_smt_active(void) { return false; } void arch_smt_update(void); -#endif +#endif /* _LINUX_SCHED_SMT_H */ -- cgit v1.2.3 From 0df7cd3c13e44d01f9f28e29cbce74e2931b00fe Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Sat, 16 Sep 2023 16:09:15 +0300 Subject: vsock/virtio/vhost: read data from non-linear skb This is preparation patch for MSG_ZEROCOPY support. It adds handling of non-linear skbs by replacing direct calls of 'memcpy_to_msg()' with 'skb_copy_datagram_iter()'. Main advantage of the second one is that it can handle paged part of the skb by using 'kmap()' on each page, but if there are no pages in the skb, it behaves like simple copying to iov iterator. This patch also adds new field to the control block of skb - this value shows current offset in the skb to read next portion of data (it doesn't matter linear it or not). Idea behind this field is that 'skb_copy_datagram_iter()' handles both types of skb internally - it just needs an offset from which to copy data from the given skb. This offset is incremented on each read from skb. This approach allows to simplify handling of both linear and non-linear skbs, because for linear skb we need to call 'skb_pull()' after reading data from it, while in non-linear case we need to update 'data_len'. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- include/linux/virtio_vsock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index c58453699ee9..a91fbdf233e4 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -12,6 +12,7 @@ struct virtio_vsock_skb_cb { bool reply; bool tap_delivered; + u32 offset; }; #define VIRTIO_VSOCK_SKB_CB(skb) ((struct virtio_vsock_skb_cb *)((skb)->cb)) -- cgit v1.2.3 From 581512a6dc939ef122e49336626ae159f3b8a345 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Sat, 16 Sep 2023 16:09:18 +0300 Subject: vsock/virtio: MSG_ZEROCOPY flag support This adds handling of MSG_ZEROCOPY flag on transmission path: 1) If this flag is set and zerocopy transmission is possible (enabled in socket options and transport allows zerocopy), then non-linear skb will be created and filled with the pages of user's buffer. Pages of user's buffer are locked in memory by 'get_user_pages()'. 2) Replaces way of skb owning: instead of 'skb_set_owner_sk_safe()' it calls 'skb_set_owner_w()'. Reason of this change is that '__zerocopy_sg_from_iter()' increments 'sk_wmem_alloc' of socket, so to decrease this field correctly, proper skb destructor is needed: 'sock_wfree()'. This destructor is set by 'skb_set_owner_w()'. 3) Adds new callback to 'struct virtio_transport': 'can_msgzerocopy'. If this callback is set, then transport needs extra check to be able to send provided number of buffers in zerocopy mode. Currently, the only transport that needs this callback set is virtio, because this transport adds new buffers to the virtio queue and we need to check, that number of these buffers is less than size of the queue (it is required by virtio spec). vhost and loopback transports don't need this check. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- include/linux/virtio_vsock.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index a91fbdf233e4..ebb3ce63d64d 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -160,6 +160,15 @@ struct virtio_transport { /* Takes ownership of the packet */ int (*send_pkt)(struct sk_buff *skb); + + /* Used in MSG_ZEROCOPY mode. Checks, that provided data + * (number of buffers) could be transmitted with zerocopy + * mode. If this callback is not implemented for the current + * transport - this means that this transport doesn't need + * extra checks and can perform zerocopy transmission by + * default. + */ + bool (*can_msgzerocopy)(int bufs_num); }; ssize_t -- cgit v1.2.3 From e9090e70e618cd62ab7bf2914511e5eea31a2535 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 25 Aug 2023 13:26:21 +0200 Subject: firmware: arm_scmi: Extend perf protocol ops to get number of domains Similar to other protocol ops, it's useful for an scmi module driver to get the number of supported performance domains, hence let's make this available by adding a new perf protocol callback. Note that, a user is being added from subsequent changes. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20230825112633.236607-2-ulf.hansson@linaro.org Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index e09ac428fa1b..14791dd54706 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -110,6 +110,7 @@ struct scmi_clk_proto_ops { * struct scmi_perf_proto_ops - represents the various operations provided * by SCMI Performance Protocol * + * @num_domains_get: gets the number of supported performance domains * @limits_set: sets limits on the performance level of a domain * @limits_get: gets limits on the performance level of a domain * @level_set: sets the performance level of a domain @@ -129,6 +130,7 @@ struct scmi_clk_proto_ops { * or in some other (abstract) scale */ struct scmi_perf_proto_ops { + int (*num_domains_get)(const struct scmi_protocol_handle *ph); int (*limits_set)(const struct scmi_protocol_handle *ph, u32 domain, u32 max_perf, u32 min_perf); int (*limits_get)(const struct scmi_protocol_handle *ph, u32 domain, -- cgit v1.2.3 From 3d99ed60721bf2e108c8fc660775766057689a92 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 25 Aug 2023 13:26:22 +0200 Subject: firmware: arm_scmi: Extend perf protocol ops to get information of a domain Similar to other protocol ops, it's useful for an scmi module driver to get some generic information of a performance domain. Therefore, let's add a new callback to provide this information. The information is currently limited to the name of the performance domain and whether the set-level operation is supported, although this can easily be extended if we find the need for it. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20230825112633.236607-3-ulf.hansson@linaro.org Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 14791dd54706..9108fe8ed3e3 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -106,11 +106,17 @@ struct scmi_clk_proto_ops { u8 oem_type, u32 oem_val, bool atomic); }; +struct scmi_perf_domain_info { + char name[SCMI_MAX_STR_SIZE]; + bool set_perf; +}; + /** * struct scmi_perf_proto_ops - represents the various operations provided * by SCMI Performance Protocol * * @num_domains_get: gets the number of supported performance domains + * @info_get: get the information of a performance domain * @limits_set: sets limits on the performance level of a domain * @limits_get: gets limits on the performance level of a domain * @level_set: sets the performance level of a domain @@ -131,6 +137,8 @@ struct scmi_clk_proto_ops { */ struct scmi_perf_proto_ops { int (*num_domains_get)(const struct scmi_protocol_handle *ph); + const struct scmi_perf_domain_info __must_check *(*info_get) + (const struct scmi_protocol_handle *ph, u32 domain); int (*limits_set)(const struct scmi_protocol_handle *ph, u32 domain, u32 max_perf, u32 min_perf); int (*limits_get)(const struct scmi_protocol_handle *ph, u32 domain, -- cgit v1.2.3 From 39dfa5b9e1f0fa63b811a0a87f1c2fb9c76a0456 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 25 Aug 2023 13:26:24 +0200 Subject: firmware: arm_scmi: Align perf ops to use domain-id as in-parameter Most scmi_perf_proto_ops are already using an "u32 domain" as an in-parameter to indicate what performance domain we shall operate upon. However, some of the ops are using a "struct device *dev", which means that an additional OF parsing is needed each time the perf ops gets called, to find the corresponding domain-id. To avoid the above, but also to make the code more consistent, let's replace the in-parameter "struct device *dev" with an "u32 domain". Note that, this requires us to make some corresponding changes to the scmi cpufreq driver, so let's do that too. Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20230825112633.236607-5-ulf.hansson@linaro.org Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 9108fe8ed3e3..4e63766adc6f 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -149,9 +149,9 @@ struct scmi_perf_proto_ops { u32 *level, bool poll); int (*device_domain_id)(struct device *dev); int (*transition_latency_get)(const struct scmi_protocol_handle *ph, - struct device *dev); + u32 domain); int (*device_opps_add)(const struct scmi_protocol_handle *ph, - struct device *dev); + struct device *dev, u32 domain); int (*freq_set)(const struct scmi_protocol_handle *ph, u32 domain, unsigned long rate, bool poll); int (*freq_get)(const struct scmi_protocol_handle *ph, u32 domain, @@ -159,7 +159,7 @@ struct scmi_perf_proto_ops { int (*est_power_get)(const struct scmi_protocol_handle *ph, u32 domain, unsigned long *rate, unsigned long *power); bool (*fast_switch_possible)(const struct scmi_protocol_handle *ph, - struct device *dev); + u32 domain); enum scmi_power_scale (*power_scale_get)(const struct scmi_protocol_handle *ph); }; -- cgit v1.2.3 From 9b578d83629e13f81a53d1695a4f700cdb10f772 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 25 Aug 2023 13:26:25 +0200 Subject: firmware: arm_scmi: Drop redundant ->device_domain_id() from perf ops There are no longer any users of the ->device_domain_id() ops in the scmi_perf_proto_ops, therefore let's remove it. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20230825112633.236607-6-ulf.hansson@linaro.org Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 4e63766adc6f..27bfa5a65b45 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -121,7 +121,6 @@ struct scmi_perf_domain_info { * @limits_get: gets limits on the performance level of a domain * @level_set: sets the performance level of a domain * @level_get: gets the performance level of a domain - * @device_domain_id: gets the scmi domain id for a given device * @transition_latency_get: gets the DVFS transition latency for a given device * @device_opps_add: adds all the OPPs for a given device * @freq_set: sets the frequency for a given device using sustained frequency @@ -147,7 +146,6 @@ struct scmi_perf_proto_ops { u32 level, bool poll); int (*level_get)(const struct scmi_protocol_handle *ph, u32 domain, u32 *level, bool poll); - int (*device_domain_id)(struct device *dev); int (*transition_latency_get)(const struct scmi_protocol_handle *ph, u32 domain); int (*device_opps_add)(const struct scmi_protocol_handle *ph, -- cgit v1.2.3 From 3dd91515ef43dd43e32e2a84e4bd881b64fb33ae Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 25 Aug 2023 13:26:32 +0200 Subject: PM: domains: Allow genpd providers to manage OPP tables directly by its FW In some cases the OPP tables aren't specified in device tree, but rather encoded in the FW. To allow a genpd provider to specify them dynamically instead, let's add a new genpd flag, GENPD_FLAG_OPP_TABLE_FW. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20230825112633.236607-13-ulf.hansson@linaro.org Signed-off-by: Sudeep Holla --- include/linux/pm_domain.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f776fb93eaa0..05ad8cefdff1 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -61,6 +61,10 @@ * GENPD_FLAG_MIN_RESIDENCY: Enable the genpd governor to consider its * components' next wakeup when determining the * optimal idle state. + * + * GENPD_FLAG_OPP_TABLE_FW: The genpd provider supports performance states, + * but its corresponding OPP tables are not + * described in DT, but are given directly by FW. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) @@ -69,6 +73,7 @@ #define GENPD_FLAG_CPU_DOMAIN (1U << 4) #define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5) #define GENPD_FLAG_MIN_RESIDENCY (1U << 6) +#define GENPD_FLAG_OPP_TABLE_FW (1U << 7) enum gpd_status { GENPD_STATE_ON = 0, /* PM domain is on */ -- cgit v1.2.3 From 9f6c532f59b20580acf8ede9409c9b8dce6e74e1 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:10 +0200 Subject: futex: Add sys_futex_wake() To complement sys_futex_waitv() add sys_futex_wake(). This syscall implements what was previously known as FUTEX_WAKE_BITSET except it uses 'unsigned long' for the bitmask and takes FUTEX2 flags. The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230921105247.936205525@noisy.programming.kicks-ass.net --- include/linux/syscalls.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 22bc6bc147f8..e174ed86da1d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -549,6 +549,9 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec __user *timeout, clockid_t clockid); + +asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, -- cgit v1.2.3 From cb8c4312afca1b2dc64107e7e7cea81911055612 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:12 +0200 Subject: futex: Add sys_futex_wait() To complement sys_futex_waitv()/wake(), add sys_futex_wait(). This syscall implements what was previously known as FUTEX_WAIT_BITSET except it uses 'unsigned long' for the value and bitmask arguments, takes timespec and clockid_t arguments for the absolute timeout and uses FUTEX2 flags. The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230921105248.164324363@noisy.programming.kicks-ass.net --- include/linux/syscalls.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e174ed86da1d..11f3fdd1ee03 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -552,6 +552,10 @@ asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); +asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned long mask, + unsigned int flags, struct __kernel_timespec __user *timespec, + clockid_t clockid); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, -- cgit v1.2.3 From 0f4b5f972216782a4acb1ae00dcb55173847c2ff Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:15 +0200 Subject: futex: Add sys_futex_requeue() Finish off the 'simple' futex2 syscall group by adding sys_futex_requeue(). Unlike sys_futex_{wait,wake}() its arguments are too numerous to fit into a regular syscall. As such, use struct futex_waitv to pass the 'source' and 'destination' futexes to the syscall. This syscall implements what was previously known as FUTEX_CMP_REQUEUE and uses {val, uaddr, flags} for source and {uaddr, flags} for destination. This design explicitly allows requeueing between different types of futex by having a different flags word per uaddr. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230921105248.511860556@noisy.programming.kicks-ass.net --- include/linux/syscalls.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 11f3fdd1ee03..0901af60d971 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -556,6 +556,9 @@ asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned l unsigned int flags, struct __kernel_timespec __user *timespec, clockid_t clockid); +asmlinkage long sys_futex_requeue(struct futex_waitv __user *waiters, + unsigned int flags, int nr_wake, int nr_requeue); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, -- cgit v1.2.3 From f31ecf671ddc498f20219453395794ff2383e06b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Jul 2023 16:14:37 -0600 Subject: io_uring: add IORING_OP_WAITID support This adds support for an async version of waitid(2), in a fully async version. If an event isn't immediately available, wait for a callback to trigger a retry. The format of the sqe is as follows: sqe->len The 'which', the idtype being queried/waited for. sqe->fd The 'pid' (or id) being waited for. sqe->file_index The 'options' being set. sqe->addr2 A pointer to siginfo_t, if any, being filled in. buf_index, add3, and waitid_flags are reserved/unused for now. waitid_flags will be used for options for this request type. One interesting use case may be to add multi-shot support, so that the request stays armed and posts a notification every time a monitored process state change occurs. Note that this does not support rusage, on Arnd's recommendation. See the waitid(2) man page for details on the arguments. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 13d19b9be9f4..fe1c5d4ec56c 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -313,6 +313,8 @@ struct io_ring_ctx { struct list_head cq_overflow_list; struct io_hash_table cancel_table; + struct hlist_head waitid_list; + const struct cred *sq_creds; /* cred used for __io_sq_thread() */ struct io_sq_data *sq_data; /* if using sq thread polling */ -- cgit v1.2.3 From 41e845628511878d6e89e2a9249c095e72aab7eb Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sat, 9 Sep 2023 21:19:32 +0200 Subject: cred: add get_cred_many and put_cred_many Some of the frequent consumers of get_cred and put_cred operate on 2 references on the same creds back-to-back. Switch them to doing the work in one go instead. Signed-off-by: Mateusz Guzik [PM: removed changelog from commit description] Signed-off-by: Paul Moore --- include/linux/cred.h | 59 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index f923528d5cc4..56bc432fe49b 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -218,6 +218,20 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) cred->cap_inheritable)); } +/** + * get_new_cred_many - Get references on a new set of credentials + * @cred: The new credentials to reference + * @nr: Number of references to acquire + * + * Get references on the specified set of new credentials. The caller must + * release all acquired references. + */ +static inline struct cred *get_new_cred_many(struct cred *cred, int nr) +{ + atomic_add(nr, &cred->usage); + return cred; +} + /** * get_new_cred - Get a reference on a new set of credentials * @cred: The new credentials to reference @@ -227,16 +241,16 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) */ static inline struct cred *get_new_cred(struct cred *cred) { - atomic_inc(&cred->usage); - return cred; + return get_new_cred_many(cred, 1); } /** - * get_cred - Get a reference on a set of credentials + * get_cred_many - Get references on a set of credentials * @cred: The credentials to reference + * @nr: Number of references to acquire * - * Get a reference on the specified set of credentials. The caller must - * release the reference. If %NULL is passed, it is returned with no action. + * Get references on the specified set of credentials. The caller must release + * all acquired reference. If %NULL is passed, it is returned with no action. * * This is used to deal with a committed set of credentials. Although the * pointer is const, this will temporarily discard the const and increment the @@ -244,14 +258,28 @@ static inline struct cred *get_new_cred(struct cred *cred) * accidental alteration of a set of credentials that should be considered * immutable. */ -static inline const struct cred *get_cred(const struct cred *cred) +static inline const struct cred *get_cred_many(const struct cred *cred, int nr) { struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return cred; validate_creds(cred); nonconst_cred->non_rcu = 0; - return get_new_cred(nonconst_cred); + return get_new_cred_many(nonconst_cred, nr); +} + +/* + * get_cred - Get a reference on a set of credentials + * @cred: The credentials to reference + * + * Get a reference on the specified set of credentials. The caller must + * release the reference. If %NULL is passed, it is returned with no action. + * + * This is used to deal with a committed set of credentials. + */ +static inline const struct cred *get_cred(const struct cred *cred) +{ + return get_cred_many(cred, 1); } static inline const struct cred *get_cred_rcu(const struct cred *cred) @@ -269,6 +297,7 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) /** * put_cred - Release a reference to a set of credentials * @cred: The credentials to release + * @nr: Number of references to release * * Release a reference to a set of credentials, deleting them when the last ref * is released. If %NULL is passed, nothing is done. @@ -277,17 +306,29 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) * on task_struct are attached by const pointers to prevent accidental * alteration of otherwise immutable credential sets. */ -static inline void put_cred(const struct cred *_cred) +static inline void put_cred_many(const struct cred *_cred, int nr) { struct cred *cred = (struct cred *) _cred; if (cred) { validate_creds(cred); - if (atomic_dec_and_test(&(cred)->usage)) + if (atomic_sub_and_test(nr, &cred->usage)) __put_cred(cred); } } +/* + * put_cred - Release a reference to a set of credentials + * @cred: The credentials to release + * + * Release a reference to a set of credentials, deleting them when the last ref + * is released. If %NULL is passed, nothing is done. + */ +static inline void put_cred(const struct cred *cred) +{ + put_cred_many(cred, 1); +} + /** * current_cred - Access the current task's subjective credentials * -- cgit v1.2.3 From 8bf0cdfac7f8aa3fa6151b5c5f5eebdb44a64e89 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Sep 2023 11:32:58 +0200 Subject: : Introduce the list_for_each_reverse() method The list_head counterpart of list_for_each_entry_reverse() was missing, add it to complete the list handling APIs in . [ This new API is also relied on by a WIP scheduler patch, so this variant is not a theoretical possibility only. ] Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org --- include/linux/list.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 164b4d0e9d2a..1837caedf723 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -686,6 +686,14 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each(pos, head) \ for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) +/** + * list_for_each_reverse - iterate backwards over a list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each_reverse(pos, head) \ + for (pos = (head)->prev; pos != (head); pos = pos->prev) + /** * list_for_each_rcu - Iterate over a list in an RCU-safe fashion * @pos: the &struct list_head to use as a loop cursor. -- cgit v1.2.3 From b8ec60e1186cdcfce41e7db4c827cb107e459002 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 19 Sep 2023 17:17:28 -0700 Subject: x86/speculation, objtool: Use absolute relocations for annotations .discard.retpoline_safe sections do not have the SHF_ALLOC flag. These sections referencing text sections' STT_SECTION symbols with PC-relative relocations like R_386_PC32 [0] is conceptually not suitable. Newer LLD will report warnings for REL relocations even for relocatable links [1]: ld.lld: warning: vmlinux.a(drivers/i2c/busses/i2c-i801.o):(.discard.retpoline_safe+0x120): has non-ABS relocation R_386_PC32 against symbol '' Switch to absolute relocations instead, which indicate link-time addresses. In a relocatable link, these addresses are also output section offsets, used by checks in tools/objtool/check.c. When linking vmlinux, these .discard.* sections will be discarded, therefore it is not a problem that R_X86_64_32 cannot represent a kernel address. Alternatively, we could set the SHF_ALLOC flag for .discard.* sections, but I think non-SHF_ALLOC for sections to be discarded makes more sense. Note: if we decide to never support REL architectures (e.g. arm, i386), we can utilize R_*_NONE relocations (.reloc ., BFD_RELOC_NONE, sym), making .discard.* sections zero-sized. That said, the section content waste is 4 bytes per entry, much smaller than sizeof(Elf{32,64}_Rel). [0] commit 1c0c1faf5692 ("objtool: Use relative pointers for annotations") [1] https://github.com/ClangBuiltLinux/linux/issues/1937 Signed-off-by: Fangrui Song Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20230920001728.1439947-1-maskray@google.com --- include/linux/objtool.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 03f82c2c2ebf..6f6da95fe7f9 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -48,13 +48,13 @@ #define ANNOTATE_NOENDBR \ "986: \n\t" \ ".pushsection .discard.noendbr\n\t" \ - ".long 986b - .\n\t" \ + ".long 986b\n\t" \ ".popsection\n\t" #define ASM_REACHABLE \ "998:\n\t" \ ".pushsection .discard.reachable\n\t" \ - ".long 998b - .\n\t" \ + ".long 998b\n\t" \ ".popsection\n\t" #else /* __ASSEMBLY__ */ @@ -66,7 +66,7 @@ #define ANNOTATE_INTRA_FUNCTION_CALL \ 999: \ .pushsection .discard.intra_function_calls; \ - .long 999b - .; \ + .long 999b; \ .popsection; /* @@ -118,7 +118,7 @@ .macro ANNOTATE_NOENDBR .Lhere_\@: .pushsection .discard.noendbr - .long .Lhere_\@ - . + .long .Lhere_\@ .popsection .endm @@ -141,7 +141,7 @@ .macro REACHABLE .Lhere_\@: .pushsection .discard.reachable - .long .Lhere_\@ - . + .long .Lhere_\@ .popsection .endm -- cgit v1.2.3 From f1a9be986cedbef839062428e8e9b2bcc9c58af5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 Sep 2023 13:12:00 -0700 Subject: mtd: Annotate struct lpddr_private with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct lpddr_private. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Miquel Raynal Cc: Richard Weinberger Cc: Vignesh Raghavendra Cc: linux-mtd@lists.infradead.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230915201159.never.112-kees@kernel.org --- include/linux/mtd/qinfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/qinfo.h b/include/linux/mtd/qinfo.h index 2e3f43788d48..0421f12156b5 100644 --- a/include/linux/mtd/qinfo.h +++ b/include/linux/mtd/qinfo.h @@ -24,7 +24,7 @@ struct lpddr_private { struct qinfo_chip *qinfo; int numchips; unsigned long chipshift; - struct flchip chips[]; + struct flchip chips[] __counted_by(numchips); }; /* qinfo_query_info structure contains request information for -- cgit v1.2.3 From 1442d628d05c7aad86e5754fa554f32edc3e77a8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 Sep 2023 13:12:06 -0700 Subject: mtd: cfi: Annotate struct cfi_private with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct cfi_private. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Miquel Raynal Cc: Richard Weinberger Cc: Vignesh Raghavendra Cc: linux-mtd@lists.infradead.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230915201206.never.107-kees@kernel.org --- include/linux/mtd/cfi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index d88bb56c18e2..947410faf9e2 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -287,7 +287,7 @@ struct cfi_private { unsigned long chipshift; /* Because they're of the same type */ const char *im_name; /* inter_module name for cmdset_setup */ unsigned long quirks; - struct flchip chips[]; /* per-chip data structure for each chip */ + struct flchip chips[] __counted_by(numchips); /* per-chip data structure for each chip */ }; uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, -- cgit v1.2.3 From 48554df6bf2b1e83f70749bf4b4d7914f8b3c01d Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Wed, 13 Sep 2023 15:16:13 +0000 Subject: blk-mq: remove RQF_MQ_INFLIGHT Since the previous patch change to only account active requests when we really allocate the driver tag, the RQF_MQ_INFLIGHT can be removed and no double account problem. 1. none elevator: flush request will use the first pending request's driver tag, won't double account. 2. other elevator: flush request will be accounted when allocate driver tag when issue, and will be unaccounted when it put the driver tag. Signed-off-by: Chengming Zhou Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20230913151616.3164338-3-chengming.zhou@linux.dev Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 958ed7e89b30..1ab3081c82ed 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -32,8 +32,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4)) /* merge of different types, fail separately */ #define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5)) -/* track inflight for MQ */ -#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) /* don't call prep for this one */ #define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) /* use hctx->sched_tags */ -- cgit v1.2.3 From 5f05285df691b1e82108eead7165feae238c95ef Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Tue, 19 Sep 2023 13:40:48 +0530 Subject: iio: hid-sensor-als: Add light color temperature support In most cases, ambient color sensors also support light color temperature. As a result, add support of light color temperature. Signed-off-by: Basavaraj Natikar Acked-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20230919081054.2050714-4-Basavaraj.Natikar@amd.com Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 13b1e65fbdcc..8af4fb3e0254 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -21,6 +21,7 @@ #define HID_USAGE_SENSOR_ALS 0x200041 #define HID_USAGE_SENSOR_DATA_LIGHT 0x2004d0 #define HID_USAGE_SENSOR_LIGHT_ILLUM 0x2004d1 +#define HID_USAGE_SENSOR_LIGHT_COLOR_TEMPERATURE 0x2004d2 /* PROX (200011) */ #define HID_USAGE_SENSOR_PROX 0x200011 -- cgit v1.2.3 From ee3710f39f9d0ae5137a866138d005fe1ad18132 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Tue, 19 Sep 2023 13:40:52 +0530 Subject: iio: hid-sensor-als: Add light chromaticity support In most cases, ambient color sensors also support the x and y light colors, which represent the coordinates on the CIE 1931 chromaticity diagram. Thus, add light chromaticity x and y. Signed-off-by: Basavaraj Natikar Acked-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20230919081054.2050714-8-Basavaraj.Natikar@amd.com Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 8af4fb3e0254..6730ee900ee1 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -22,6 +22,9 @@ #define HID_USAGE_SENSOR_DATA_LIGHT 0x2004d0 #define HID_USAGE_SENSOR_LIGHT_ILLUM 0x2004d1 #define HID_USAGE_SENSOR_LIGHT_COLOR_TEMPERATURE 0x2004d2 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY 0x2004d3 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY_X 0x2004d4 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY_Y 0x2004d5 /* PROX (200011) */ #define HID_USAGE_SENSOR_PROX 0x200011 -- cgit v1.2.3 From a741deac787f0d2d7068638c067db20af9e63752 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 26 Jul 2023 13:57:03 -0700 Subject: torture: Make torture_hrtimeout_ns() take an hrtimer mode parameter The current torture-test sleeps are waiting for a duration, but there are situations where it is better to wait for an absolute time, for example, when ending a stutter interval. This commit therefore adds an hrtimer mode parameter to torture_hrtimeout_ns(). Why not also the other torture_hrtimeout_*() functions? The theory is that most absolute times will be in nanoseconds, especially not (say) jiffies. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- include/linux/torture.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index bb466eec01e4..017f0f710815 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -81,7 +81,8 @@ static inline void torture_random_init(struct torture_random_state *trsp) } /* Definitions for high-resolution-timer sleeps. */ -int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_state *trsp); +int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode mode, + struct torture_random_state *trsp); int torture_hrtimeout_us(u32 baset_us, u32 fuzzt_ns, struct torture_random_state *trsp); int torture_hrtimeout_ms(u32 baset_ms, u32 fuzzt_us, struct torture_random_state *trsp); int torture_hrtimeout_jiffies(u32 baset_j, struct torture_random_state *trsp); -- cgit v1.2.3 From 0cfecd7d754f2ef5d7e6b56ee656a8544ade920a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 31 Jul 2023 13:10:34 -0700 Subject: torture: Move rcutorture_sched_setaffinity() out of rcutorture The rcutorture_sched_setaffinity() function is needed by locktorture, so move its declaration from rcu.h to torture.h and rename it to the more generic torture_sched_setaffinity() name. Please note that use of this function is still restricted to torture tests, and of those, currently only rcutorture and locktorture. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- include/linux/torture.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 017f0f710815..c98d0c83d117 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -121,10 +121,15 @@ void _torture_stop_kthread(char *m, struct task_struct **tp); #define torture_stop_kthread(n, tp) \ _torture_stop_kthread("Stopping " #n " task", &(tp)) +/* Scheduler-related definitions. */ #ifdef CONFIG_PREEMPTION #define torture_preempt_schedule() __preempt_schedule() #else #define torture_preempt_schedule() do { } while (0) #endif +#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST) +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); +#endif + #endif /* __LINUX_TORTURE_H */ -- cgit v1.2.3 From 5970fbad1036d1015abe45651628b39b5bcb8a22 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2023 22:54:47 -0700 Subject: fscrypt: make it clearer that key_prefix is deprecated fscrypt_operations::key_prefix should not be set by any filesystems that aren't setting it already. This is already documented, but apparently it's not sufficiently clear, as both ceph and btrfs have tried to set it. Rename the field to legacy_key_prefix and improve the documentation to hopefully make it clearer. Link: https://lore.kernel.org/r/20230925055451.59499-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index c895b12737a1..b0037566ce30 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -73,12 +73,16 @@ struct fscrypt_operations { unsigned int flags; /* - * If set, this is a filesystem-specific key description prefix that - * will be accepted for "logon" keys for v1 fscrypt policies, in - * addition to the generic prefix "fscrypt:". This functionality is - * deprecated, so new filesystems shouldn't set this field. + * This field exists only for backwards compatibility reasons and should + * only be set by the filesystems that are setting it already. It + * contains the filesystem-specific key description prefix that is + * accepted for "logon" keys for v1 fscrypt policies. This + * functionality is deprecated in favor of the generic prefix + * "fscrypt:", which itself is deprecated in favor of the filesystem + * keyring ioctls such as FS_IOC_ADD_ENCRYPTION_KEY. Filesystems that + * are newly adding fscrypt support should not set this field. */ - const char *key_prefix; + const char *legacy_key_prefix; /* * Get the fscrypt context of the given inode. -- cgit v1.2.3 From 40e13e18168fd0f1a6ad10166f5042a21c47ab99 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2023 22:54:48 -0700 Subject: fscrypt: make the bounce page pool opt-in instead of opt-out Replace FS_CFLG_OWN_PAGES with a bit flag 'needs_bounce_pages' which has the opposite meaning. I.e., filesystems now opt into the bounce page pool instead of opt out. Make fscrypt_alloc_bounce_page() check that the bounce page pool has been initialized. I believe the opt-in makes more sense, since nothing else in fscrypt_operations is opt-out, and these days filesystems can choose to use blk-crypto which doesn't need the fscrypt bounce page pool. Also, I happen to be planning to add two more flags, and I wanted to fix the "FS_CFLG_" name anyway as it wasn't prefixed with "FSCRYPT_". Link: https://lore.kernel.org/r/20230925055451.59499-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index b0037566ce30..4505078e89b7 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -59,18 +59,20 @@ struct fscrypt_name { #ifdef CONFIG_FS_ENCRYPTION -/* - * If set, the fscrypt bounce page pool won't be allocated (unless another - * filesystem needs it). Set this if the filesystem always uses its own bounce - * pages for writes and therefore won't need the fscrypt bounce page pool. - */ -#define FS_CFLG_OWN_PAGES (1U << 1) - /* Crypto operations for filesystems */ struct fscrypt_operations { - /* Set of optional flags; see above for allowed flags */ - unsigned int flags; + /* + * If set, then fs/crypto/ will allocate a global bounce page pool the + * first time an encryption key is set up for a file. The bounce page + * pool is required by the following functions: + * + * - fscrypt_encrypt_pagecache_blocks() + * - fscrypt_zeroout_range() for files not using inline crypto + * + * If the filesystem doesn't use those, it doesn't need to set this. + */ + unsigned int needs_bounce_pages : 1; /* * This field exists only for backwards compatibility reasons and should -- cgit v1.2.3 From 753a4d531bc518633ea88ac0ed02b25a16823d51 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 22 Sep 2023 22:55:16 +0200 Subject: ata: libata-sata: increase PMP SRST timeout to 10s On certain SATA controllers, softreset fails after wakeup from S2RAM with the message "softreset failed (1st FIS failed)", sometimes resulting in drives not being detected again. With the increased timeout, this issue is avoided. Instead, "softreset failed (device not ready)" is now logged 1-2 times; this later failure seems to cause fewer problems however, and the drives are detected reliably once they've spun up and the probe is retried. The issue was observed with the primary SATA controller of the QNAP TS-453B, which is an "Intel Corporation Celeron/Pentium Silver Processor SATA Controller [8086:31e3] (rev 06)" integrated in the Celeron J4125 CPU, and the following drives: - Seagate IronWolf ST12000VN0008 - Seagate IronWolf ST8000NE0004 The SATA controller seems to be more relevant to this issue than the drives, as the same drives are always detected reliably on the secondary SATA controller on the same board (an ASMedia 106x) without any "softreset failed" errors even without the increased timeout. Fixes: e7d3ef13d52a ("libata: change drive ready wait after hard reset to 5s") Cc: stable@vger.kernel.org Signed-off-by: Matthias Schiffer Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index bf4913f4d7ac..84aca8c44fa3 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -259,7 +259,7 @@ enum { * advised to wait only for the following duration before * doing SRST. */ - ATA_TMOUT_PMP_SRST_WAIT = 5000, + ATA_TMOUT_PMP_SRST_WAIT = 10000, /* When the LPM policy is set to ATA_LPM_MAX_POWER, there might * be a spurious PHY event, so ignore the first PHY event that -- cgit v1.2.3 From b3239498353484fd6ddeb513df89c4628cd623d0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Sep 2023 21:25:12 +0300 Subject: wifi: mac80211: use bandwidth indication element for CSA In CSA, parse the (EHT) bandwidth indication element and use it (in fact prefer it if present). Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.43ef01920556.If4f24a61cd634ab1e50eba43899b9e992bf25602@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 340d7e0f6bf7..f11b7022d9eb 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3139,6 +3139,28 @@ ieee80211_eht_oper_size_ok(const u8 *data, u8 len) return len >= needed; } +#define IEEE80211_BW_IND_DIS_SUBCH_PRESENT BIT(1) + +struct ieee80211_bandwidth_indication { + u8 params; + struct ieee80211_eht_operation_info info; +} __packed; + +static inline bool +ieee80211_bandwidth_indication_size_ok(const u8 *data, u8 len) +{ + const struct ieee80211_bandwidth_indication *bwi = (const void *)data; + + if (len < sizeof(*bwi)) + return false; + + if (bwi->params & IEEE80211_BW_IND_DIS_SUBCH_PRESENT && + len < sizeof(*bwi) + 2) + return false; + + return true; +} + #define LISTEN_INT_USF GENMASK(15, 14) #define LISTEN_INT_UI GENMASK(13, 0) @@ -3596,6 +3618,7 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_EHT_OPERATION = 106, WLAN_EID_EXT_EHT_MULTI_LINK = 107, WLAN_EID_EXT_EHT_CAPABILITY = 108, + WLAN_EID_EXT_BANDWIDTH_INDICATION = 135, }; /* Action category code */ -- cgit v1.2.3 From 62e9c64eedfeb697ba28081ccaac59a45f9a96e1 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Wed, 20 Sep 2023 21:25:24 +0300 Subject: wifi: mac80211: add support for parsing TID to Link mapping element Add the relevant definitions for TID to Link mapping element according to the P802.11be_D4.0. Signed-off-by: Ayala Beker Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.9ea9b0b4412a.I2281ab2c70e8b43a39032dc115db6a80f1f0b3f4@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 58 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f11b7022d9eb..f2965ff3d7c1 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1246,6 +1246,30 @@ struct ieee80211_twt_setup { u8 params[]; } __packed; +#define IEEE80211_TTLM_MAX_CNT 2 +#define IEEE80211_TTLM_CONTROL_DIRECTION 0x03 +#define IEEE80211_TTLM_CONTROL_DEF_LINK_MAP 0x04 +#define IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT 0x08 +#define IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT 0x10 +#define IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE 0x20 + +#define IEEE80211_TTLM_DIRECTION_DOWN 0 +#define IEEE80211_TTLM_DIRECTION_UP 1 +#define IEEE80211_TTLM_DIRECTION_BOTH 2 + +/** + * struct ieee80211_ttlm_elem - TID-To-Link Mapping element + * + * Defined in section 9.4.2.314 in P802.11be_D4 + * + * @control: the first part of control field + * @optional: the second part of control field + */ +struct ieee80211_ttlm_elem { + u8 control; + u8 optional[]; +} __packed; + struct ieee80211_mgmt { __le16 frame_control; __le16 duration; @@ -3618,6 +3642,7 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_EHT_OPERATION = 106, WLAN_EID_EXT_EHT_MULTI_LINK = 107, WLAN_EID_EXT_EHT_CAPABILITY = 108, + WLAN_EID_EXT_TID_TO_LINK_MAPPING = 109, WLAN_EID_EXT_BANDWIDTH_INDICATION = 135, }; @@ -5155,6 +5180,39 @@ static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data, fixed + prof->sta_info_len - 1 <= len; } +static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len) +{ + const struct ieee80211_ttlm_elem *t2l = (const void *)data; + u8 control, fixed = sizeof(*t2l), elem_len = 0; + + if (len < fixed) + return false; + + control = t2l->control; + + if (control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT) + elem_len += 2; + if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT) + elem_len += 3; + + if (!(control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP)) { + u8 bm_size; + + elem_len += 1; + if (len < fixed + elem_len) + return false; + + if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE) + bm_size = 1; + else + bm_size = 2; + + elem_len += hweight8(t2l->optional[0]) * bm_size; + } + + return len >= fixed + elem_len; +} + #define for_each_mle_subelement(_elem, _data, _len) \ if (ieee80211_mle_size_ok(_data, _len)) \ for_each_element(_elem, \ -- cgit v1.2.3 From ef246a1480cc484cd2aeda75737cb0848616ddf3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Sep 2023 21:25:26 +0300 Subject: wifi: mac80211: support antenna control in injection Support antenna control for injection by parsing the antenna radiotap field (which may be presented multiple times) and telling the driver about the resulting antenna bitmap. Of course there's no guarantee the driver will actually honour this, just like any other injection control. If misconfigured, i.e. the injected HT/VHT MCS needs more chains than antennas are configured, the bitmap is reset to zero, indicating no selection. For now this is only set up for two anntenas so we keep more free bits, but that can be trivially extended if any driver implements support for it that can deal with hardware with more antennas. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.f71001aa4da9.I00ccb762a806ea62bc3d728fa3a0d29f4f285eeb@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f2965ff3d7c1..3b02f038d509 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1705,6 +1705,8 @@ struct ieee80211_mcs_info { #define IEEE80211_HT_MCS_TX_MAX_STREAMS 4 #define IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION 0x10 +#define IEEE80211_HT_MCS_CHAINS(mcs) ((mcs) == 32 ? 1 : (1 + ((mcs) >> 3))) + /* * 802.11n D5.0 20.3.5 / 20.6 says: * - indices 0 to 7 and 32 are single spatial stream -- cgit v1.2.3 From 823a0258912b1a4f627e455bee7e78d349c29c5b Mon Sep 17 00:00:00 2001 From: Po-Hao Huang Date: Mon, 25 Sep 2023 16:08:59 +0800 Subject: wifi: ieee80211: add UL-bandwidth definition of trigger frame Define UL-bandwidth values of trigger frame according to 802.11 std. Signed-off-by: Po-Hao Huang Signed-off-by: Ping-Ke Shih Link: https://lore.kernel.org/r/20230925080902.51449-2-pkshih@realtek.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 3b02f038d509..62b4469c6866 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -307,6 +307,13 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) #define IEEE80211_TRIGGER_TYPE_BQRP 0x6 #define IEEE80211_TRIGGER_TYPE_NFRP 0x7 +/* UL-bandwidth within common_info of trigger frame */ +#define IEEE80211_TRIGGER_ULBW_MASK 0xc0000 +#define IEEE80211_TRIGGER_ULBW_20MHZ 0x0 +#define IEEE80211_TRIGGER_ULBW_40MHZ 0x1 +#define IEEE80211_TRIGGER_ULBW_80MHZ 0x2 +#define IEEE80211_TRIGGER_ULBW_160_80P80MHZ 0x3 + struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; -- cgit v1.2.3 From df31b298477e65a01deff0af352be3a61524d930 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 Sep 2023 10:43:34 -0300 Subject: iommu: Add iommu_ops->identity_domain This allows a driver to set a global static to an IDENTITY domain and the core code will automatically use it whenever an IDENTITY domain is requested. By making it always available it means the IDENTITY can be used in error handling paths to force the iommu driver into a known state. Devices implementing global static identity domains should avoid failing their attach_dev ops. To make global static domains simpler allow drivers to omit their free function and update the iommufd selftest. Convert rockchip to use the new mechanism. Tested-by: Steven Price Tested-by: Marek Szyprowski Tested-by: Nicolin Chen Reviewed-by: Lu Baolu Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v8-81230027b2fa+9d-iommu_all_defdom_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c50a769d569a..d0920b2a9f1c 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -260,6 +260,8 @@ struct iommu_iotlb_gather { * will be blocked by the hardware. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops + * @identity_domain: An always available, always attachable identity + * translation. */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); @@ -294,6 +296,7 @@ struct iommu_ops { const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner; + struct iommu_domain *identity_domain; }; /** -- cgit v1.2.3 From 1c68cbc64fe6ac01dc242ba562344303031a76fb Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 Sep 2023 10:43:35 -0300 Subject: iommu: Add IOMMU_DOMAIN_PLATFORM This is used when the iommu driver is taking control of the dma_ops, currently only on S390 and power spapr. It is designed to preserve the original ops->detach_dev() semantic that these S390 was built around. Provide an opaque domain type and a 'default_domain' ops value that allows the driver to trivially force any single domain as the default domain. Update iommufd selftest to use this instead of set_platform_dma_ops Reviewed-by: Lu Baolu Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v8-81230027b2fa+9d-iommu_all_defdom_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d0920b2a9f1c..a05480be05fd 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -64,6 +64,7 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */ #define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */ +#define __IOMMU_DOMAIN_PLATFORM (1U << 5) #define IOMMU_DOMAIN_ALLOC_FLAGS ~__IOMMU_DOMAIN_DMA_FQ /* @@ -81,6 +82,8 @@ struct iommu_domain_geometry { * invalidation. * IOMMU_DOMAIN_SVA - DMA addresses are shared process addresses * represented by mm_struct's. + * IOMMU_DOMAIN_PLATFORM - Legacy domain for drivers that do their own + * dma_api stuff. Do not use in new drivers. */ #define IOMMU_DOMAIN_BLOCKED (0U) #define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) @@ -91,6 +94,7 @@ struct iommu_domain_geometry { __IOMMU_DOMAIN_DMA_API | \ __IOMMU_DOMAIN_DMA_FQ) #define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SVA) +#define IOMMU_DOMAIN_PLATFORM (__IOMMU_DOMAIN_PLATFORM) struct iommu_domain { unsigned type; @@ -262,6 +266,9 @@ struct iommu_iotlb_gather { * @owner: Driver module providing these ops * @identity_domain: An always available, always attachable identity * translation. + * @default_domain: If not NULL this will always be set as the default domain. + * This should be an IDENTITY/BLOCKED/PLATFORM domain. + * Do not use in new drivers. */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); @@ -297,6 +304,7 @@ struct iommu_ops { unsigned long pgsize_bitmap; struct module *owner; struct iommu_domain *identity_domain; + struct iommu_domain *default_domain; }; /** -- cgit v1.2.3 From 24b1d476167df3e30c7a53b67765bf3c787c5160 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 Sep 2023 10:43:48 -0300 Subject: iommu: Remove ops->set_platform_dma_ops() All drivers are now using IDENTITY or PLATFORM domains for what this did, we can remove it now. It is no longer possible to attach to a NULL domain. Tested-by: Heiko Stuebner Tested-by: Niklas Schnelle Tested-by: Steven Price Tested-by: Marek Szyprowski Tested-by: Nicolin Chen Reviewed-by: Lu Baolu Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/15-v8-81230027b2fa+9d-iommu_all_defdom_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a05480be05fd..511dfeea5272 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -243,9 +243,6 @@ struct iommu_iotlb_gather { * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain - * @set_platform_dma_ops: Returning control back to the platform DMA ops. This op - * is to support old IOMMU drivers, new drivers should use - * default domains, and the common IOMMU DMA ops. * @device_group: find iommu group for a particular device * @get_resv_regions: Request list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping @@ -280,7 +277,6 @@ struct iommu_ops { struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); - void (*set_platform_dma_ops)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); /* Request/Free a list of reserved regions for a device */ -- cgit v1.2.3 From 4601cd2d7c4c82c4bafc822e1ff630a709eff206 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 Sep 2023 10:43:55 -0300 Subject: iommu: Add ops->domain_alloc_paging() This callback requests the driver to create only a __IOMMU_DOMAIN_PAGING domain, so it saves a few lines in a lot of drivers needlessly checking the type. More critically, this allows us to sweep out all the IOMMU_DOMAIN_UNMANAGED and IOMMU_DOMAIN_DMA checks from a lot of the drivers, simplifying what is going on in the code and ultimately removing the now-unused special cases in drivers where they did not support IOMMU_DOMAIN_DMA. domain_alloc_paging() should return a struct iommu_domain that is functionally compatible with ARM_DMA_USE_IOMMU, dma-iommu.c and iommufd. Be forwards looking and pass in a 'struct device *' argument. We can provide this when allocating the default_domain. No drivers will look at this. Tested-by: Steven Price Tested-by: Marek Szyprowski Tested-by: Nicolin Chen Reviewed-by: Lu Baolu Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/22-v8-81230027b2fa+9d-iommu_all_defdom_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 511dfeea5272..3f173307434d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -239,6 +239,8 @@ struct iommu_iotlb_gather { * use. The information type is one of enum iommu_hw_info_type defined * in include/uapi/linux/iommufd.h. * @domain_alloc: allocate iommu domain + * @domain_alloc_paging: Allocate an iommu_domain that can be used for + * UNMANAGED, DMA, and DMA_FQ domain types. * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU @@ -273,6 +275,7 @@ struct iommu_ops { /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); + struct iommu_domain *(*domain_alloc_paging)(struct device *dev); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); -- cgit v1.2.3 From e8f52d84cf0b6d1862ab62f7ed705f78690d11b2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 22 Aug 2023 13:15:57 -0300 Subject: iommu: Add generic_single_device_group() This implements the common pattern seen in drivers of a single iommu_group for the entire iommu driver instance. Implement this in core code so the drivers that want this can select it from their ops. Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v1-c869a95191f2+5e8-iommu_single_grp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3f173307434d..5b693be3d35f 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -378,6 +378,7 @@ struct iommu_domain_ops { * @list: Used by the iommu-core to keep a list of registered iommus * @ops: iommu-ops for talking to this iommu * @dev: struct device for sysfs handling + * @singleton_group: Used internally for drivers that have only one group * @max_pasids: number of supported PASIDs */ struct iommu_device { @@ -385,6 +386,7 @@ struct iommu_device { const struct iommu_ops *ops; struct fwnode_handle *fwnode; struct device *dev; + struct iommu_group *singleton_group; u32 max_pasids; }; @@ -648,6 +650,7 @@ extern struct iommu_group *pci_device_group(struct device *dev); extern struct iommu_group *generic_device_group(struct device *dev); /* FSL-MC device grouping function */ struct iommu_group *fsl_mc_device_group(struct device *dev); +extern struct iommu_group *generic_single_device_group(struct device *dev); /** * struct iommu_fwspec - per-device IOMMU instance data -- cgit v1.2.3 From 950210887670cbb7d2eb9af6fb743b70f1a1ebdc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 19 Sep 2023 20:54:37 +0200 Subject: thermal: core: Drop trips_disabled bitmask After recent changes, thermal_zone_get_trip() cannot fail, as invoked from thermal_zone_device_register_with_trips(), so the only role of the trips_disabled bitmask is struct thermal_zone_device is to make handle_thermal_trip() skip trip points whose temperature was initially zero. However, since the unit of temperature in the thermal core is millicelsius, zero may very well be a valid temperature value at least in some usage scenarios and the trip temperature may as well change later. Thus there is no reason to permanently disable trip points with initial temperature equal to zero. Accordingly, drop the trips_disabled bitmask along with the code related to it. Signed-off-by: Rafael J. Wysocki Tested-by: Ido Schimmel Acked-by: Daniel Lezcano --- include/linux/thermal.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index a5ae4af955ff..6cfcae22ba12 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -122,7 +122,6 @@ struct thermal_cooling_device { * @devdata: private pointer for device private data * @trips: an array of struct thermal_trip * @num_trips: number of trip points the thermal zone supports - * @trips_disabled; bitmap for disabled trips * @passive_delay_jiffies: number of jiffies to wait between polls when * performing passive cooling. * @polling_delay_jiffies: number of jiffies to wait between polls when @@ -163,7 +162,6 @@ struct thermal_zone_device { void *devdata; struct thermal_trip *trips; int num_trips; - unsigned long trips_disabled; /* bitmap for disabled trips */ unsigned long passive_delay_jiffies; unsigned long polling_delay_jiffies; int temperature; -- cgit v1.2.3 From bd111e987e762d82dc738232c6ed4b3c9bcc5c91 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 12 Sep 2023 17:18:43 +0100 Subject: iommu: Retire map/unmap ops With everyone now implementing the new interfaces, clean up the last remnants of the old map/unmap ops and simplify the calling logic again. Signed-off-by: Robin Murphy Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/d2afdf13b2fbf537713c3ec642dfd49d16dd9e6a.1694525662.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5b693be3d35f..64bd20142cbe 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -322,10 +322,8 @@ struct iommu_ops { * * ENODEV - device specific errors, not able to be attached * * - treated as ENODEV by the caller. Use is discouraged * @set_dev_pasid: set an iommu domain to a pasid of device - * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to * an iommu domain. - * @unmap: unmap a physically contiguous memory region from an iommu domain * @unmap_pages: unmap a number of pages of the same size from an iommu domain * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain * @iotlb_sync_map: Sync mappings created recently using @map to the hardware @@ -344,13 +342,9 @@ struct iommu_domain_ops { int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); - int (*map)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp); int (*map_pages)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped); - size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *iotlb_gather); size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *iotlb_gather); -- cgit v1.2.3 From cf08fa74c716cf20e5038d1e7dbbd7dba1b76062 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 13 Sep 2023 16:29:17 +0800 Subject: regulator: mt6358: Add output voltage fine tuning to fixed regulators The "fixed" LDO regulators found on the MT6358 and MT6366 PMICs have either no voltage selection register, or only one valid setting. However these do have a fine voltage calibration setting that can slightly boost the output voltage from 0 mV to 100 mV, in 10 mV increments. Add support for this by changing these into linear range regulators. Some register definitions that are missing are also added. Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Acked-by: Lee Jones Link: https://lore.kernel.org/r/20230913082919.1631287-3-wenst@chromium.org Signed-off-by: Mark Brown --- include/linux/mfd/mt6358/registers.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/mt6358/registers.h b/include/linux/mfd/mt6358/registers.h index 3d33517f178c..5ea2590be710 100644 --- a/include/linux/mfd/mt6358/registers.h +++ b/include/linux/mfd/mt6358/registers.h @@ -262,6 +262,12 @@ #define MT6358_LDO_VBIF28_CON3 0x1db0 #define MT6358_VCAMA1_ANA_CON0 0x1e08 #define MT6358_VCAMA2_ANA_CON0 0x1e0c +#define MT6358_VFE28_ANA_CON0 0x1e10 +#define MT6358_VCN28_ANA_CON0 0x1e14 +#define MT6358_VBIF28_ANA_CON0 0x1e18 +#define MT6358_VAUD28_ANA_CON0 0x1e1c +#define MT6358_VAUX18_ANA_CON0 0x1e20 +#define MT6358_VXO22_ANA_CON0 0x1e24 #define MT6358_VCN33_ANA_CON0 0x1e28 #define MT6358_VSIM1_ANA_CON0 0x1e2c #define MT6358_VSIM2_ANA_CON0 0x1e30 -- cgit v1.2.3 From 581beb4fe37d17571c87058d13d298d5458e25e9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:02:58 +0100 Subject: iov_iter: Remove last_offset from iov_iter as it was for ITER_PIPE Now that ITER_PIPE has been removed, iov_iter::last_offset is no longer used, so remove it. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230925120309.1731676-2-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Christian Brauner --- include/linux/uio.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 42bce38a8e87..2000e42a6586 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -44,10 +44,7 @@ struct iov_iter { bool nofault; bool data_source; bool user_backed; - union { - size_t iov_offset; - int last_offset; - }; + size_t iov_offset; /* * Hack alert: overlay ubuf_iovec with iovec + count, so * that the members resolve correctly regardless of the type -- cgit v1.2.3 From 7d9e44a6ad8a22135e6308d330da29271f41a98a Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:03:02 +0100 Subject: iov_iter: Renumber ITER_* constants Renumber the ITER_* iterator-type constants to put things in the same order as in the iteration functions and to group user-backed iterators at the bottom. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230925120309.1731676-6-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Christian Brauner --- include/linux/uio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 2000e42a6586..bef8e56aa45c 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -21,12 +21,12 @@ struct kvec { enum iter_type { /* iter types */ + ITER_UBUF, ITER_IOVEC, - ITER_KVEC, ITER_BVEC, + ITER_KVEC, ITER_XARRAY, ITER_DISCARD, - ITER_UBUF, }; #define ITER_SOURCE 1 // == WRITE -- cgit v1.2.3 From f1b4cb650b9a0eeba206d8f069fcdc532bfbcd74 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:03:03 +0100 Subject: iov_iter: Derive user-backedness from the iterator type Use the iterator type to determine whether an iterator is user-backed or not rather than using a special flag for it. Now that ITER_UBUF and ITER_IOVEC are 0 and 1, they can be checked with a single comparison. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230925120309.1731676-7-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Christian Brauner --- include/linux/uio.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index bef8e56aa45c..65d9143f83c8 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -43,7 +43,6 @@ struct iov_iter { bool copy_mc; bool nofault; bool data_source; - bool user_backed; size_t iov_offset; /* * Hack alert: overlay ubuf_iovec with iovec + count, so @@ -140,7 +139,7 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i) static inline bool user_backed_iter(const struct iov_iter *i) { - return i->user_backed; + return iter_is_ubuf(i) || iter_is_iovec(i); } /* @@ -380,7 +379,6 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, *i = (struct iov_iter) { .iter_type = ITER_UBUF, .copy_mc = false, - .user_backed = true, .data_source = direction, .ubuf = buf, .count = count, -- cgit v1.2.3 From f1982740f5e77090bde41a9b84e257d69ec46598 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:03:04 +0100 Subject: iov_iter: Convert iterate*() to inline funcs Convert the iov_iter iteration macros to inline functions to make the code easier to follow. The functions are marked __always_inline as we don't want to end up with indirect calls in the code. This, however, leaves dealing with ->copy_mc in an awkard situation since the step function (memcpy_from_iter_mc()) needs to test the flag in the iterator, but isn't passed the iterator. This will be dealt with in a follow-up patch. The variable names in the per-type iterator functions have been harmonised as much as possible and made clearer as to the variable purpose. The iterator functions are also moved to a header file so that other operations that need to scan over an iterator can be added. For instance, the rbd driver could use this to scan a buffer to see if it is all zeros and libceph could use this to generate a crc. Signed-off-by: David Howells Link: https://lore.kernel.org/r/3710261.1691764329@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/855.1692047347@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/20230816120741.534415-1-dhowells@redhat.com/ # v3 Link: https://lore.kernel.org/r/20230925120309.1731676-8-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Christian Brauner --- include/linux/iov_iter.h | 274 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 274 insertions(+) create mode 100644 include/linux/iov_iter.h (limited to 'include/linux') diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h new file mode 100644 index 000000000000..270454a6703d --- /dev/null +++ b/include/linux/iov_iter.h @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* I/O iterator iteration building functions. + * + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef _LINUX_IOV_ITER_H +#define _LINUX_IOV_ITER_H + +#include +#include + +typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len, + void *priv, void *priv2); +typedef size_t (*iov_ustep_f)(void __user *iter_base, size_t progress, size_t len, + void *priv, void *priv2); + +/* + * Handle ITER_UBUF. + */ +static __always_inline +size_t iterate_ubuf(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_ustep_f step) +{ + void __user *base = iter->ubuf; + size_t progress = 0, remain; + + remain = step(base + iter->iov_offset, 0, len, priv, priv2); + progress = len - remain; + iter->iov_offset += progress; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_IOVEC. + */ +static __always_inline +size_t iterate_iovec(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_ustep_f step) +{ + const struct iovec *p = iter->__iov; + size_t progress = 0, skip = iter->iov_offset; + + do { + size_t remain, consumed; + size_t part = min(len, p->iov_len - skip); + + if (likely(part)) { + remain = step(p->iov_base + skip, progress, part, priv, priv2); + consumed = part - remain; + progress += consumed; + skip += consumed; + len -= consumed; + if (skip < p->iov_len) + break; + } + p++; + skip = 0; + } while (len); + + iter->nr_segs -= p - iter->__iov; + iter->__iov = p; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_KVEC. + */ +static __always_inline +size_t iterate_kvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + const struct kvec *p = iter->kvec; + size_t progress = 0, skip = iter->iov_offset; + + do { + size_t remain, consumed; + size_t part = min(len, p->iov_len - skip); + + if (likely(part)) { + remain = step(p->iov_base + skip, progress, part, priv, priv2); + consumed = part - remain; + progress += consumed; + skip += consumed; + len -= consumed; + if (skip < p->iov_len) + break; + } + p++; + skip = 0; + } while (len); + + iter->nr_segs -= p - iter->kvec; + iter->kvec = p; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_BVEC. + */ +static __always_inline +size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + const struct bio_vec *p = iter->bvec; + size_t progress = 0, skip = iter->iov_offset; + + do { + size_t remain, consumed; + size_t offset = p->bv_offset + skip, part; + void *kaddr = kmap_local_page(p->bv_page + offset / PAGE_SIZE); + + part = min3(len, + (size_t)(p->bv_len - skip), + (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); + remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2); + kunmap_local(kaddr); + consumed = part - remain; + len -= consumed; + progress += consumed; + skip += consumed; + if (skip >= p->bv_len) { + skip = 0; + p++; + } + if (remain) + break; + } while (len); + + iter->nr_segs -= p - iter->bvec; + iter->bvec = p; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_XARRAY. + */ +static __always_inline +size_t iterate_xarray(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + struct folio *folio; + size_t progress = 0; + loff_t start = iter->xarray_start + iter->iov_offset; + pgoff_t index = start / PAGE_SIZE; + XA_STATE(xas, iter->xarray, index); + + rcu_read_lock(); + xas_for_each(&xas, folio, ULONG_MAX) { + size_t remain, consumed, offset, part, flen; + + if (xas_retry(&xas, folio)) + continue; + if (WARN_ON(xa_is_value(folio))) + break; + if (WARN_ON(folio_test_hugetlb(folio))) + break; + + offset = offset_in_folio(folio, start + progress); + flen = min(folio_size(folio) - offset, len); + + while (flen) { + void *base = kmap_local_folio(folio, offset); + + part = min_t(size_t, flen, + PAGE_SIZE - offset_in_page(offset)); + remain = step(base, progress, part, priv, priv2); + kunmap_local(base); + + consumed = part - remain; + progress += consumed; + len -= consumed; + + if (remain || len == 0) + goto out; + flen -= consumed; + offset += consumed; + } + } + +out: + rcu_read_unlock(); + iter->iov_offset += progress; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_DISCARD. + */ +static __always_inline +size_t iterate_discard(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + size_t progress = len; + + iter->count -= progress; + return progress; +} + +/** + * iterate_and_advance2 - Iterate over an iterator + * @iter: The iterator to iterate over. + * @len: The amount to iterate over. + * @priv: Data for the step functions. + * @priv2: More data for the step functions. + * @ustep: Function for UBUF/IOVEC iterators; given __user addresses. + * @step: Function for other iterators; given kernel addresses. + * + * Iterate over the next part of an iterator, up to the specified length. The + * buffer is presented in segments, which for kernel iteration are broken up by + * physical pages and mapped, with the mapped address being presented. + * + * Two step functions, @step and @ustep, must be provided, one for handling + * mapped kernel addresses and the other is given user addresses which have the + * potential to fault since no pinning is performed. + * + * The step functions are passed the address and length of the segment, @priv, + * @priv2 and the amount of data so far iterated over (which can, for example, + * be added to @priv to point to the right part of a second buffer). The step + * functions should return the amount of the segment they didn't process (ie. 0 + * indicates complete processsing). + * + * This function returns the amount of data processed (ie. 0 means nothing was + * processed and the value of @len means processes to completion). + */ +static __always_inline +size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv, + void *priv2, iov_ustep_f ustep, iov_step_f step) +{ + if (unlikely(iter->count < len)) + len = iter->count; + if (unlikely(!len)) + return 0; + + if (likely(iter_is_ubuf(iter))) + return iterate_ubuf(iter, len, priv, priv2, ustep); + if (likely(iter_is_iovec(iter))) + return iterate_iovec(iter, len, priv, priv2, ustep); + if (iov_iter_is_bvec(iter)) + return iterate_bvec(iter, len, priv, priv2, step); + if (iov_iter_is_kvec(iter)) + return iterate_kvec(iter, len, priv, priv2, step); + if (iov_iter_is_xarray(iter)) + return iterate_xarray(iter, len, priv, priv2, step); + return iterate_discard(iter, len, priv, priv2, step); +} + +/** + * iterate_and_advance - Iterate over an iterator + * @iter: The iterator to iterate over. + * @len: The amount to iterate over. + * @priv: Data for the step functions. + * @ustep: Function for UBUF/IOVEC iterators; given __user addresses. + * @step: Function for other iterators; given kernel addresses. + * + * As iterate_and_advance2(), but priv2 is always NULL. + */ +static __always_inline +size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv, + iov_ustep_f ustep, iov_step_f step) +{ + return iterate_and_advance2(iter, len, priv, NULL, ustep, step); +} + +#endif /* _LINUX_IOV_ITER_H */ -- cgit v1.2.3 From 3acf8ace68230e9558cf916847f1cc9f208abdf1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Sep 2023 23:31:39 +0200 Subject: bpf: Add missed value to kprobe perf link info Add missed value to kprobe attached through perf link info to hold the stats of missed kprobe handler execution. The kprobe's missed counter gets incremented when kprobe handler is not executed due to another kprobe running on the same cpu. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230920213145.1941596-4-jolsa@kernel.org --- include/linux/trace_events.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 21ae37e49319..5eb88a66eb68 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -761,7 +761,8 @@ struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name); void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr); + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed); int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); #else @@ -801,7 +802,7 @@ static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) static inline int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, - u64 *probe_addr) + u64 *probe_addr, unsigned long *missed) { return -EOPNOTSUPP; } @@ -877,6 +878,7 @@ extern void perf_kprobe_destroy(struct perf_event *event); extern int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, u64 *probe_addr, + unsigned long *missed, bool perf_type_tracepoint); #endif #ifdef CONFIG_UPROBE_EVENTS -- cgit v1.2.3 From dd8657894c11b03c6eb0fd53fe9d7fec2072d18b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Sep 2023 23:31:40 +0200 Subject: bpf: Count missed stats in trace_call_bpf Increase misses stats in case bpf array execution is skipped because of recursion check in trace_call_bpf. Adding bpf_prog_inc_misses_counters that increase misses counts for all bpf programs in bpf_prog_array. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Tested-by: Song Liu Reviewed-by: Song Liu Link: https://lore.kernel.org/bpf/20230920213145.1941596-5-jolsa@kernel.org --- include/linux/bpf.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 30063a760b5a..a82efd34b741 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2922,6 +2922,22 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ +static __always_inline void +bpf_prog_inc_misses_counters(const struct bpf_prog_array *array) +{ + const struct bpf_prog_array_item *item; + struct bpf_prog *prog; + + if (unlikely(!array)) + return; + + item = &array->items[0]; + while ((prog = READ_ONCE(item->prog))) { + bpf_prog_inc_misses_counter(prog); + item++; + } +} + #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, -- cgit v1.2.3 From 7a0263dc904f3467f474e4088ae092eda9a5a99b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2023 22:54:50 -0700 Subject: fscrypt: replace get_ino_and_lblk_bits with just has_32bit_inodes Now that fs/crypto/ computes the filesystem's lblk_bits from its maximum file size, it is no longer necessary for filesystems to provide lblk_bits via fscrypt_operations::get_ino_and_lblk_bits. It is still necessary for fs/crypto/ to retrieve ino_bits from the filesystem. However, this is used only to decide whether inode numbers fit in 32 bits. Also, ino_bits is static for all relevant filesystems, i.e. it doesn't depend on the filesystem instance. Therefore, in the interest of keeping things as simple as possible, replace 'get_ino_and_lblk_bits' with a flag 'has_32bit_inodes'. This can always be changed back to a function if a filesystem needs it to be dynamic, but for now a static flag is all that's needed. Link: https://lore.kernel.org/r/20230925055451.59499-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 4505078e89b7..09a3cacbf62a 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -74,6 +74,17 @@ struct fscrypt_operations { */ unsigned int needs_bounce_pages : 1; + /* + * If set, then fs/crypto/ will allow the use of encryption settings + * that assume inode numbers fit in 32 bits (i.e. + * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64}), provided that the other + * prerequisites for these settings are also met. This is only useful + * if the filesystem wants to support inline encryption hardware that is + * limited to 32-bit or 64-bit data unit numbers and where programming + * keyslots is very slow. + */ + unsigned int has_32bit_inodes : 1; + /* * This field exists only for backwards compatibility reasons and should * only be set by the filesystems that are setting it already. It @@ -151,21 +162,6 @@ struct fscrypt_operations { */ bool (*has_stable_inodes)(struct super_block *sb); - /* - * Get the number of bits that the filesystem uses to represent inode - * numbers and file logical block numbers. - * - * By default, both of these are assumed to be 64-bit. This function - * can be implemented to declare that either or both of these numbers is - * shorter, which may allow the use of the - * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64} flags and/or the use of - * inline crypto hardware whose maximum DUN length is less than 64 bits - * (e.g., eMMC v5.2 spec compliant hardware). This function only needs - * to be implemented if support for one of these features is needed. - */ - void (*get_ino_and_lblk_bits)(struct super_block *sb, - int *ino_bits_ret, int *lblk_bits_ret); - /* * Return an array of pointers to the block devices to which the * filesystem may write encrypted file contents, NULL if the filesystem -- cgit v1.2.3 From 5b11888471806edf699316d4dcb9b426caebbef2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2023 22:54:51 -0700 Subject: fscrypt: support crypto data unit size less than filesystem block size Until now, fscrypt has always used the filesystem block size as the granularity of file contents encryption. Two scenarios have come up where a sub-block granularity of contents encryption would be useful: 1. Inline crypto hardware that only supports a crypto data unit size that is less than the filesystem block size. 2. Support for direct I/O at a granularity less than the filesystem block size, for example at the block device's logical block size in order to match the traditional direct I/O alignment requirement. (1) first came up with older eMMC inline crypto hardware that only supports a crypto data unit size of 512 bytes. That specific case ultimately went away because all systems with that hardware continued using out of tree code and never actually upgraded to the upstream inline crypto framework. But, now it's coming back in a new way: some current UFS controllers only support a data unit size of 4096 bytes, and there is a proposal to increase the filesystem block size to 16K. (2) was discussed as a "nice to have" feature, though not essential, when support for direct I/O on encrypted files was being upstreamed. Still, the fact that this feature has come up several times does suggest it would be wise to have available. Therefore, this patch implements it by using one of the reserved bytes in fscrypt_policy_v2 to allow users to select a sub-block data unit size. Supported data unit sizes are powers of 2 between 512 and the filesystem block size, inclusively. Support is implemented for both the FS-layer and inline crypto cases. This patch focuses on the basic support for sub-block data units. Some things are out of scope for this patch but may be addressed later: - Supporting sub-block data units in combination with FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64, in most cases. Unfortunately this combination usually causes data unit indices to exceed 32 bits, and thus fscrypt_supported_policy() correctly disallows it. The users who potentially need this combination are using f2fs. To support it, f2fs would need to provide an option to slightly reduce its max file size. - Supporting sub-block data units in combination with FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32. This has the same problem described above, but also it will need special code to make DUN wraparound still happen on a FS block boundary. - Supporting use case (2) mentioned above. The encrypted direct I/O code will need to stop requiring and assuming FS block alignment. This won't be hard, but it belongs in a separate patch. - Supporting this feature on filesystems other than ext4 and f2fs. (Filesystems declare support for it via their fscrypt_operations.) On UBIFS, sub-block data units don't make sense because UBIFS encrypts variable-length blocks as a result of compression. CephFS could support it, but a bit more work would be needed to make the fscrypt_*_block_inplace functions play nicely with sub-block data units. I don't think there's a use case for this on CephFS anyway. Link: https://lore.kernel.org/r/20230925055451.59499-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 09a3cacbf62a..b559e6f77707 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -85,6 +85,18 @@ struct fscrypt_operations { */ unsigned int has_32bit_inodes : 1; + /* + * If set, then fs/crypto/ will allow users to select a crypto data unit + * size that is less than the filesystem block size. This is done via + * the log2_data_unit_size field of the fscrypt policy. This flag is + * not compatible with filesystems that encrypt variable-length blocks + * (i.e. blocks that aren't all equal to filesystem's block size), for + * example as a result of compression. It's also not compatible with + * the fscrypt_encrypt_block_inplace() and + * fscrypt_decrypt_block_inplace() functions. + */ + unsigned int supports_subblock_data_units : 1; + /* * This field exists only for backwards compatibility reasons and should * only be set by the filesystems that are setting it already. It -- cgit v1.2.3 From e850d9a52f4cd31521c80a7ea9718b69129af4d5 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 12 Aug 2023 01:05:07 +0800 Subject: badblocks: add more helper structure and routines in badblocks.h This patch adds the following helper structure and routines into badblocks.h, - struct badblocks_context This structure is used in improved badblocks code for bad table iteration. - BB_END() The macro to calculate end LBA of a bad range record from bad table. - badblocks_full() and badblocks_empty() The inline routines to check whether bad table is full or empty. - set_changed() and clear_changed() The inline routines to set and clear 'changed' tag from struct badblocks. These new helper structure and routines can help to make the code more clear, they will be used in the improved badblocks code in following patches. Signed-off-by: Coly Li Reviewed-by: Xiao Ni Cc: Dan Williams Cc: Geliang Tang Cc: Hannes Reinecke Cc: Jens Axboe Cc: NeilBrown Cc: Vishal L Verma Acked-by: Geliang Tang Link: https://lore.kernel.org/r/20230811170513.2300-2-colyli@suse.de Signed-off-by: Jens Axboe --- include/linux/badblocks.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/badblocks.h b/include/linux/badblocks.h index 2426276b9bd3..670f2dae692f 100644 --- a/include/linux/badblocks.h +++ b/include/linux/badblocks.h @@ -15,6 +15,7 @@ #define BB_OFFSET(x) (((x) & BB_OFFSET_MASK) >> 9) #define BB_LEN(x) (((x) & BB_LEN_MASK) + 1) #define BB_ACK(x) (!!((x) & BB_ACK_MASK)) +#define BB_END(x) (BB_OFFSET(x) + BB_LEN(x)) #define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63)) /* Bad block numbers are stored sorted in a single page. @@ -41,6 +42,12 @@ struct badblocks { sector_t size; /* in sectors */ }; +struct badblocks_context { + sector_t start; + sector_t len; + int ack; +}; + int badblocks_check(struct badblocks *bb, sector_t s, int sectors, sector_t *first_bad, int *bad_sectors); int badblocks_set(struct badblocks *bb, sector_t s, int sectors, @@ -63,4 +70,27 @@ static inline void devm_exit_badblocks(struct device *dev, struct badblocks *bb) } badblocks_exit(bb); } + +static inline int badblocks_full(struct badblocks *bb) +{ + return (bb->count >= MAX_BADBLOCKS); +} + +static inline int badblocks_empty(struct badblocks *bb) +{ + return (bb->count == 0); +} + +static inline void set_changed(struct badblocks *bb) +{ + if (bb->changed != 1) + bb->changed = 1; +} + +static inline void clear_changed(struct badblocks *bb) +{ + if (bb->changed != 0) + bb->changed = 0; +} + #endif -- cgit v1.2.3 From 948f0bf5ad6ac1e5f19f6aa8e7da4a950d66b661 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 20 Sep 2023 13:07:42 +0300 Subject: IB/mlx5: Add support for 800G_8X lane speed Add a check for 800G_8X speed when querying PTYS and report it back correctly when needed. Signed-off-by: Patrisious Haddad Reviewed-by: Mark Zhang Link: https://lore.kernel.org/r/26fd0b6e1fac071c3eb779657bb3d8ba47f47c4f.1695204156.git.leon@kernel.org Reviewed-by: Jacob Keller Signed-off-by: Leon Romanovsky --- include/linux/mlx5/port.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 98b2e1e149f9..794001ebd003 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -117,6 +117,7 @@ enum mlx5e_ext_link_mode { MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13, MLX5E_400GAUI_8 = 15, MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16, + MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19, MLX5E_EXT_LINK_MODES_NUMBER, }; -- cgit v1.2.3 From b28ad32442bec2f0d9cb660d7d698a1a53c13d08 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 20 Sep 2023 13:07:43 +0300 Subject: IB/mlx5: Rename 400G_8X speed to comply to naming convention Rename 400G_8X speed to comply to naming convention. Signed-off-by: Patrisious Haddad Reviewed-by: Mark Zhang Link: https://lore.kernel.org/r/ac98447cac8379a43fbdb36d56e5fb2b741a97ff.1695204156.git.leon@kernel.org Reviewed-by: Jacob Keller Signed-off-by: Leon Romanovsky --- include/linux/mlx5/port.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 794001ebd003..26092c78a985 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -115,7 +115,7 @@ enum mlx5e_ext_link_mode { MLX5E_100GAUI_1_100GBASE_CR_KR = 11, MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12, MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13, - MLX5E_400GAUI_8 = 15, + MLX5E_400GAUI_8_400GBASE_CR8 = 15, MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16, MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19, MLX5E_EXT_LINK_MODES_NUMBER, -- cgit v1.2.3 From 9cf63f3a33e929f7eca36409914b8c12102b9984 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:54:37 -0700 Subject: platform/surface: aggregator: Annotate struct ssam_event with __counted_by MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct ssam_event. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Maximilian Luz Cc: platform-driver-x86@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: "Gustavo A. R. Silva" Reviewed-by: Maximilian Luz Link: https://lore.kernel.org/r/20230922175436.work.031-kees@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/surface_aggregator/controller.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h index cb7980805920..5b67f0f47d80 100644 --- a/include/linux/surface_aggregator/controller.h +++ b/include/linux/surface_aggregator/controller.h @@ -44,7 +44,7 @@ struct ssam_event { u8 command_id; u8 instance_id; u16 length; - u8 data[]; + u8 data[] __counted_by(length); }; /** -- cgit v1.2.3 From 8d74f1da776da9b0306630b13a3025214fa44618 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Fri, 22 Sep 2023 11:15:52 +0530 Subject: PM: sleep: Fix symbol export for _SIMPLE_ variants of _PM_OPS() Currently EXPORT_*_SIMPLE_DEV_PM_OPS() use EXPORT_*_DEV_PM_OPS() set of macros to export dev_pm_ops symbol, which export the symbol in case CONFIG_PM=y but don't take CONFIG_PM_SLEEP into consideration. Since _SIMPLE_ variants of _PM_OPS() do not include runtime PM handles and are only used in case CONFIG_PM_SLEEP=y, we should not be exporting dev_pm_ops symbol for them in case CONFIG_PM_SLEEP=n. This can be fixed by having two distinct set of export macros for both _RUNTIME_ and _SIMPLE_ variants of _PM_OPS(), such that the export of dev_pm_ops symbol used in each variant depends on CONFIG_PM and CONFIG_PM_SLEEP respectively. Introduce _DEV_SLEEP_PM_OPS() set of export macros for _SIMPLE_ variants of _PM_OPS(), which export dev_pm_ops symbol only in case CONFIG_PM_SLEEP=y and discard it otherwise. Fixes: 34e1ed189fab ("PM: Improve EXPORT_*_DEV_PM_OPS macros") Signed-off-by: Raag Jadav Reviewed-by: Paul Cercueil Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 1400c37b29c7..629c1633bbd0 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -374,24 +374,39 @@ const struct dev_pm_ops name = { \ RUNTIME_PM_OPS(runtime_suspend_fn, runtime_resume_fn, idle_fn) \ } -#ifdef CONFIG_PM -#define _EXPORT_DEV_PM_OPS(name, license, ns) \ +#define _EXPORT_PM_OPS(name, license, ns) \ const struct dev_pm_ops name; \ __EXPORT_SYMBOL(name, license, ns); \ const struct dev_pm_ops name -#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns) -#else -#define _EXPORT_DEV_PM_OPS(name, license, ns) \ + +#define _DISCARD_PM_OPS(name, license, ns) \ static __maybe_unused const struct dev_pm_ops __static_##name + +#ifdef CONFIG_PM +#define _EXPORT_DEV_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) +#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) +#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns) +#else +#define _EXPORT_DEV_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) #define EXPORT_PM_FN_GPL(name) #define EXPORT_PM_FN_NS_GPL(name, ns) #endif -#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "") -#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "GPL", "") -#define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns) -#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "GPL", #ns) +#ifdef CONFIG_PM_SLEEP +#define _EXPORT_DEV_SLEEP_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) +#else +#define _EXPORT_DEV_SLEEP_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) +#endif + +#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "") +#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "GPL", "") +#define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns) +#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "GPL", #ns) + +#define EXPORT_DEV_SLEEP_PM_OPS(name) _EXPORT_DEV_SLEEP_PM_OPS(name, "", "") +#define EXPORT_GPL_DEV_SLEEP_PM_OPS(name) _EXPORT_DEV_SLEEP_PM_OPS(name, "GPL", "") +#define EXPORT_NS_DEV_SLEEP_PM_OPS(name, ns) _EXPORT_DEV_SLEEP_PM_OPS(name, "", #ns) +#define EXPORT_NS_GPL_DEV_SLEEP_PM_OPS(name, ns) _EXPORT_DEV_SLEEP_PM_OPS(name, "GPL", #ns) /* * Use this if you want to use the same suspend and resume callbacks for suspend @@ -404,19 +419,19 @@ const struct dev_pm_ops name = { \ _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL) #define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - EXPORT_DEV_PM_OPS(name) = { \ + EXPORT_DEV_SLEEP_PM_OPS(name) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } #define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - EXPORT_GPL_DEV_PM_OPS(name) = { \ + EXPORT_GPL_DEV_SLEEP_PM_OPS(name) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } #define EXPORT_NS_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ - EXPORT_NS_DEV_PM_OPS(name, ns) = { \ + EXPORT_NS_DEV_SLEEP_PM_OPS(name, ns) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } #define EXPORT_NS_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ - EXPORT_NS_GPL_DEV_PM_OPS(name, ns) = { \ + EXPORT_NS_GPL_DEV_SLEEP_PM_OPS(name, ns) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } -- cgit v1.2.3 From 4d0e179a42879f7d76a5b95a2e7e7a5afa33954a Mon Sep 17 00:00:00 2001 From: Reka Norman Date: Fri, 25 Aug 2023 12:43:55 +1000 Subject: media: cros-ec-cec: Manage an array of ports To support multiple CEC ports, change cros_ec_cec to contain an array of ports, each with their own CEC adapter, etc. For now, only create a single port and use that port everywhere, so there is no functional change. Support for multiple ports will be added in the following patches. Signed-off-by: Reka Norman Signed-off-by: Hans Verkuil --- include/linux/platform_data/cros_ec_commands.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index ab721cf13a98..cb2ddd10a613 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4436,6 +4436,8 @@ struct ec_response_i2c_passthru_protect { * These commands are for sending and receiving message via HDMI CEC */ +#define EC_CEC_MAX_PORTS 16 + #define MAX_CEC_MSG_LEN 16 /* CEC message from the AP to be written on the CEC bus */ -- cgit v1.2.3 From e90bd1fe7cda1aa267fe683e392b4433ec2dc0d3 Mon Sep 17 00:00:00 2001 From: Reka Norman Date: Fri, 25 Aug 2023 12:43:56 +1000 Subject: media: cros-ec-cec: Support multiple ports in set/get host commands Reuse the top four bits of the cmd field to specify the port number. The reason for doing this as opposed to adding a separate uint8_t field is it avoids the need to add new versions of these commands. The change is backwards compatible since these bits were previously always zero, so the default behaviour is to always operate on port 0. Signed-off-by: Reka Norman Signed-off-by: Hans Verkuil --- include/linux/platform_data/cros_ec_commands.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index cb2ddd10a613..e8bb05db360f 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4457,13 +4457,15 @@ struct ec_params_cec_write { /** * struct ec_params_cec_set - CEC parameters set * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS + * @port: CEC port to set the parameter on * @val: in case cmd is CEC_CMD_ENABLE, this field can be 0 to disable CEC * or 1 to enable CEC functionality, in case cmd is * CEC_CMD_LOGICAL_ADDRESS, this field encodes the requested logical * address between 0 and 15 or 0xff to unregister */ struct ec_params_cec_set { - uint8_t cmd; /* enum cec_command */ + uint8_t cmd : 4; /* enum cec_command */ + uint8_t port : 4; uint8_t val; } __ec_align1; @@ -4473,9 +4475,11 @@ struct ec_params_cec_set { /** * struct ec_params_cec_get - CEC parameters get * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS + * @port: CEC port to get the parameter on */ struct ec_params_cec_get { - uint8_t cmd; /* enum cec_command */ + uint8_t cmd : 4; /* enum cec_command */ + uint8_t port : 4; } __ec_align1; /** -- cgit v1.2.3 From adbfc747ddfb48c06d238640e16939916b7a4494 Mon Sep 17 00:00:00 2001 From: Reka Norman Date: Fri, 25 Aug 2023 12:43:57 +1000 Subject: media: cros-ec-cec: Support multiple ports in write command Add a v1 of the CEC write command which contains a port parameter. Check which versions of the write command the EC supports and use the highest supported version. If it only supports v0, check that there is only one port. With v0, the EC will assume all write commands are for port 0. Signed-off-by: Reka Norman Signed-off-by: Hans Verkuil --- include/linux/platform_data/cros_ec_commands.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index e8bb05db360f..9a0c6e28f370 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4451,6 +4451,18 @@ struct ec_params_cec_write { uint8_t msg[MAX_CEC_MSG_LEN]; } __ec_align1; +/** + * struct ec_params_cec_write_v1 - Message to write to the CEC bus + * @port: CEC port to write the message on + * @msg_len: length of msg in bytes + * @msg: message content to write to the CEC bus + */ +struct ec_params_cec_write_v1 { + uint8_t port; + uint8_t msg_len; + uint8_t msg[MAX_CEC_MSG_LEN]; +} __ec_align1; + /* Set various CEC parameters */ #define EC_CMD_CEC_SET 0x00BA -- cgit v1.2.3 From 1cabf52639d16428bc0d61028dcaf38e29c5f3b5 Mon Sep 17 00:00:00 2001 From: Reka Norman Date: Fri, 25 Aug 2023 12:43:58 +1000 Subject: media: cros-ec-cec: Support multiple ports in MKBP cec_events Use the top four bits of the cec_events MKBP event to store the port number. Signed-off-by: Reka Norman Signed-off-by: Hans Verkuil --- include/linux/platform_data/cros_ec_commands.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 9a0c6e28f370..b7e8573a8a49 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4440,6 +4440,16 @@ struct ec_response_i2c_passthru_protect { #define MAX_CEC_MSG_LEN 16 +/* + * Helper macros for packing/unpacking cec_events. + * bits[27:0] : bitmask of events from enum mkbp_cec_event + * bits[31:28]: port number + */ +#define EC_MKBP_EVENT_CEC_PACK(events, port) \ + (((events) & GENMASK(27, 0)) | (((port) & 0xf) << 28)) +#define EC_MKBP_EVENT_CEC_GET_EVENTS(event) ((event) & GENMASK(27, 0)) +#define EC_MKBP_EVENT_CEC_GET_PORT(event) (((event) >> 28) & 0xf) + /* CEC message from the AP to be written on the CEC bus */ #define EC_CMD_CEC_WRITE_MSG 0x00B8 -- cgit v1.2.3 From 425d20518c54bc6d66d733fb117a9a4046932d50 Mon Sep 17 00:00:00 2001 From: Reka Norman Date: Fri, 25 Aug 2023 12:43:59 +1000 Subject: media: cros-ec-cec: Support receiving messages from multiple ports Currently, received messages are sent from the EC in the cec_message MKBP event. Since the size of ec_response_get_next_data_v1 is 16 bytes, which is also the maximum size of a CEC message, there is no space to add a port parameter. Increasing the size of ec_response_get_next_data_v1 is an option, but this would increase EC-kernel traffic for all MKBP event types. Instead, use an event to notify that data is ready, and add a new read command to read the data. For backwards compatibility with old EC firmware, continue to handle cec_message events as well. Signed-off-by: Reka Norman Signed-off-by: Hans Verkuil --- include/linux/platform_data/cros_ec_commands.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index b7e8573a8a49..ad61c7ff0b28 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4473,6 +4473,27 @@ struct ec_params_cec_write_v1 { uint8_t msg[MAX_CEC_MSG_LEN]; } __ec_align1; +/* CEC message read from a CEC bus reported back to the AP */ +#define EC_CMD_CEC_READ_MSG 0x00B9 + +/** + * struct ec_params_cec_read - Read a message from the CEC bus + * @port: CEC port to read a message on + */ +struct ec_params_cec_read { + uint8_t port; +} __ec_align1; + +/** + * struct ec_response_cec_read - Message read from the CEC bus + * @msg_len: length of msg in bytes + * @msg: message content read from the CEC bus + */ +struct ec_response_cec_read { + uint8_t msg_len; + uint8_t msg[MAX_CEC_MSG_LEN]; +} __ec_align1; + /* Set various CEC parameters */ #define EC_CMD_CEC_SET 0x00BA @@ -4529,6 +4550,8 @@ enum mkbp_cec_event { EC_MKBP_CEC_SEND_OK = BIT(0), /* Outgoing message was not acknowledged */ EC_MKBP_CEC_SEND_FAILED = BIT(1), + /* Incoming message can be read out by AP */ + EC_MKBP_CEC_HAVE_DATA = BIT(2), }; /*****************************************************************************/ -- cgit v1.2.3 From 5d227f02ceb9cc120cf04efbd77e12da182a5f62 Mon Sep 17 00:00:00 2001 From: Reka Norman Date: Fri, 25 Aug 2023 12:44:01 +1000 Subject: media: cros-ec-cec: Get number of CEC ports from EC Add a new CEC port count host command and use it to query the number of CEC ports from the EC. If the host command is not supported then it must be old EC firmware which only supports one port, so fall back to assuming one port. This patch completes support for multiple ports in cros-ec-cec. Signed-off-by: Reka Norman Signed-off-by: Hans Verkuil --- include/linux/platform_data/cros_ec_commands.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index ad61c7ff0b28..7dae17b62a4d 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4536,6 +4536,17 @@ struct ec_response_cec_get { uint8_t val; } __ec_align1; +/* Get the number of CEC ports */ +#define EC_CMD_CEC_PORT_COUNT 0x00C1 + +/** + * struct ec_response_cec_port_count - CEC port count response + * @port_count: number of CEC ports + */ +struct ec_response_cec_port_count { + uint8_t port_count; +} __ec_align1; + /* CEC parameters command */ enum cec_command { /* CEC reading, writing and events enable */ -- cgit v1.2.3 From 24775700eaa93ff83b2a0f1e005879cdf186cdd9 Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Tue, 26 Sep 2023 05:19:32 +0000 Subject: x86/amd_nb: Add AMD Family MI300 PCI IDs Add new Root, Device 18h Function 3, and Function 4 PCI IDS for AMD F19h Model 90h-9fh (MI300A). Signed-off-by: Muralidhara M K Signed-off-by: Suma Hegde Signed-off-by: Ingo Molnar Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20230926051932.193239-1-suma.hegde@amd.com --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 5fb3d4c393a9..91b457de262e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -579,6 +579,7 @@ #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3 #define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb #define PCI_DEVICE_ID_AMD_MI200_DF_F3 0x14d3 +#define PCI_DEVICE_ID_AMD_MI300_DF_F3 0x152b #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 -- cgit v1.2.3 From 2d5780bbef8dbe6375d481cbea212606a80e4453 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 26 Sep 2023 20:55:56 +0200 Subject: swiotlb: fix the check whether a device has used software IO TLB When CONFIG_SWIOTLB_DYNAMIC=y, devices which do not use the software IO TLB can avoid swiotlb lookup. A flag is added by commit 1395706a1490 ("swiotlb: search the software IO TLB only if the device makes use of it"), the flag is correctly set, but it is then never checked. Add the actual check here. Note that this code is an alternative to the default pool check, not an additional check, because: 1. swiotlb_find_pool() also searches the default pool; 2. if dma_uses_io_tlb is false, the default swiotlb pool is not used. Tested in a KVM guest against a QEMU RAM-backed SATA disk over virtio and *not* using software IO TLB, this patch increases IOPS by approx 2% for 4-way parallel I/O. The write memory barrier in swiotlb_dyn_alloc() is not needed, because a newly allocated pool must always be observed by swiotlb_find_slots() before an address from that pool is passed to is_swiotlb_buffer(). Correctness was verified using the following litmus test: C swiotlb-new-pool (* * Result: Never * * Check that a newly allocated pool is always visible when the * corresponding swiotlb buffer is visible. *) { mem_pools = default; } P0(int **mem_pools, int *pool) { /* add_mem_pool() */ WRITE_ONCE(*pool, 999); rcu_assign_pointer(*mem_pools, pool); } P1(int **mem_pools, int *flag, int *buf) { /* swiotlb_find_slots() */ int *r0; int r1; rcu_read_lock(); r0 = READ_ONCE(*mem_pools); r1 = READ_ONCE(*r0); rcu_read_unlock(); if (r1) { WRITE_ONCE(*flag, 1); smp_mb(); } /* device driver (presumed) */ WRITE_ONCE(*buf, r1); } P2(int **mem_pools, int *flag, int *buf) { /* device driver (presumed) */ int r0 = READ_ONCE(*buf); /* is_swiotlb_buffer() */ int r1; int *r2; int r3; smp_rmb(); r1 = READ_ONCE(*flag); if (r1) { /* swiotlb_find_pool() */ rcu_read_lock(); r2 = READ_ONCE(*mem_pools); r3 = READ_ONCE(*r2); rcu_read_unlock(); } } exists (2:r0<>0 /\ 2:r3=0) (* Not found. *) Fixes: 1395706a1490 ("swiotlb: search the software IO TLB only if the device makes use of it") Reported-by: Jonathan Corbet Closes: https://lore.kernel.org/linux-iommu/87a5uz3ob8.fsf@meer.lwn.net/ Signed-off-by: Petr Tesarik Reviewed-by: Catalin Marinas Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index b4536626f8ff..ecde0312dd52 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -172,14 +172,23 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) if (!mem) return false; - if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) { - /* Pairs with smp_wmb() in swiotlb_find_slots() and - * swiotlb_dyn_alloc(), which modify the RCU lists. - */ - smp_rmb(); - return swiotlb_find_pool(dev, paddr); - } +#ifdef CONFIG_SWIOTLB_DYNAMIC + /* + * All SWIOTLB buffer addresses must have been returned by + * swiotlb_tbl_map_single() and passed to a device driver. + * If a SWIOTLB address is checked on another CPU, then it was + * presumably loaded by the device driver from an unspecified private + * data structure. Make sure that this load is ordered before reading + * dev->dma_uses_io_tlb here and mem->pools in swiotlb_find_pool(). + * + * This barrier pairs with smp_mb() in swiotlb_find_slots(). + */ + smp_rmb(); + return READ_ONCE(dev->dma_uses_io_tlb) && + swiotlb_find_pool(dev, paddr); +#else return paddr >= mem->defpool.start && paddr < mem->defpool.end; +#endif } static inline bool is_swiotlb_force_bounce(struct device *dev) -- cgit v1.2.3 From 2e2b547950bc09e75afe912f9683be39c2195d9d Mon Sep 17 00:00:00 2001 From: Wenchao Chen Date: Tue, 19 Sep 2023 15:47:06 +0800 Subject: mmc: core: Allow dynamical updates of the number of requests for hsq To allow dynamical updates of the current number of used in-flight requests, let's move away from using a hard-coded value to a use a corresponding variable in the struct mmc_host. This can be valuable when optimizing for certain I/O request sequences, as shown by subsequent changes. Signed-off-by: Wenchao Chen Link: https://lore.kernel.org/r/20230919074707.25517-2-wenchao.chen@unisoc.com [Ulf: Re-wrote the commitmsg to clarify the change] Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 62a6847a3b6f..2f445c651742 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -526,6 +526,7 @@ struct mmc_host { /* Host Software Queue support */ bool hsq_enabled; + int hsq_depth; u32 err_stats[MMC_ERR_MAX]; unsigned long private[] ____cacheline_aligned; -- cgit v1.2.3 From 1a6a464774947920dcedcf7409be62495c7cedd0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 12 Sep 2023 12:44:06 +0200 Subject: timers: Tag (hr)timer softirq as hotplug safe Specific stress involving frequent CPU-hotplug operations, such as running rcutorture for example, may trigger the following message: NOHZ tick-stop error: local softirq work is pending, handler #02!!!" This happens in the CPU-down hotplug process, after CPUHP_AP_SMPBOOT_THREADS whose teardown callback parks ksoftirqd, and before the target CPU shuts down through CPUHP_AP_IDLE_DEAD. In this fragile intermediate state, softirqs waiting for threaded handling may be forever ignored and eventually reported by the idle task as in the above example. However some vectors are known to be safe as long as the corresponding subsystems have teardown callbacks handling the migration of their events. The above error message reports pending timers softirq although this vector can be considered as hotplug safe because the CPUHP_TIMERS_PREPARE teardown callback performs the necessary migration of timers after the death of the CPU. Hrtimers also have a similar hotplug handling. Therefore this error message, as far as (hr-)timers are concerned, can be considered spurious and the relevant softirq vectors can be marked as hotplug safe. Fixes: 0345691b24c0 ("tick/rcu: Stop allowing RCU_SOFTIRQ in idle") Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Joel Fernandes (Google) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230912104406.312185-6-frederic@kernel.org --- include/linux/interrupt.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a92bce40b04b..4a1dc88ddbff 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -569,8 +569,12 @@ enum * 2) rcu_report_dead() reports the final quiescent states. * * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue + * + * _ (HR)TIMER_SOFTIRQ: (hr)timers_dead_cpu() migrates the queue */ -#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ)) +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(TIMER_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ) |\ + BIT(HRTIMER_SOFTIRQ) | BIT(RCU_SOFTIRQ)) + /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. -- cgit v1.2.3 From c02a427f7b64ed5b840a0720a6cee5a17a1e7e07 Mon Sep 17 00:00:00 2001 From: Xueshi Hu Date: Tue, 12 Sep 2023 12:44:05 +0200 Subject: tick/nohz: Remove unused tick_nohz_idle_stop_tick_protected() All the caller has been removed since commit 336f560a8917 ("x86/xen: don't let xen_pv_play_dead() return") Signed-off-by: Xueshi Hu Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230912104406.312185-5-frederic@kernel.org --- include/linux/tick.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 9459fef5b857..716d17f31c45 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -140,14 +140,6 @@ extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); - -static inline void tick_nohz_idle_stop_tick_protected(void) -{ - local_irq_disable(); - tick_nohz_idle_stop_tick(); - local_irq_enable(); -} - #else /* !CONFIG_NO_HZ_COMMON */ #define tick_nohz_enabled (0) static inline int tick_nohz_tick_stopped(void) { return 0; } @@ -170,8 +162,6 @@ static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } - -static inline void tick_nohz_idle_stop_tick_protected(void) { } #endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL -- cgit v1.2.3 From 6260ecd04594360ae2af104fb2641317728a66e4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:51:27 -0700 Subject: irqdomain: Annotate struct irq_domain with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct irq_domain. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20230922175127.work.214-kees@kernel.org --- include/linux/irqdomain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 51c254b7fec2..ee0a82c60508 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -174,7 +174,7 @@ struct irq_domain { irq_hw_number_t hwirq_max; unsigned int revmap_size; struct radix_tree_root revmap_tree; - struct irq_data __rcu *revmap[]; + struct irq_data __rcu *revmap[] __counted_by(revmap_size); }; /* Irq domain flags */ -- cgit v1.2.3 From d069ed6b752f91cea6341a9c60be42837678a7f5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 21 Sep 2023 20:01:43 +0200 Subject: thermal: core: Allow trip pointers to be used for cooling device binding Add new helper functions, thermal_bind_cdev_to_trip() and thermal_unbind_cdev_from_trip(), to allow a trip pointer to be used for binding a cooling device to a trip point and unbinding it, respectively, and redefine the existing helpers, thermal_zone_bind_cooling_device() and thermal_zone_unbind_cooling_device(), as wrappers around the new ones, respectively. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Daniel Lezcano --- include/linux/thermal.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 6cfcae22ba12..6710a4ace992 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -320,10 +320,18 @@ const char *thermal_zone_device_type(struct thermal_zone_device *tzd); int thermal_zone_device_id(struct thermal_zone_device *tzd); struct device *thermal_zone_device(struct thermal_zone_device *tzd); +int thermal_bind_cdev_to_trip(struct thermal_zone_device *tz, + const struct thermal_trip *trip, + struct thermal_cooling_device *cdev, + unsigned long upper, unsigned long lower, + unsigned int weight); int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *, unsigned long, unsigned long, unsigned int); +int thermal_unbind_cdev_from_trip(struct thermal_zone_device *tz, + const struct thermal_trip *trip, + struct thermal_cooling_device *cdev); int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *, -- cgit v1.2.3 From 5aa4c9608d2d5fea29e211a80c29696f7d94e9f7 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 11 Sep 2023 12:38:48 +0300 Subject: net/mlx5: Introduce ifc bits for migration in a chunk mode Introduce ifc related stuff to enable migration in a chunk mode. Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20230911093856.81910-2-yishaih@nvidia.com Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fc3db401f8a2..3265bfcb3156 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1948,7 +1948,9 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_c0[0x8]; u8 migration_multi_load[0x1]; u8 migration_tracking_state[0x1]; - u8 reserved_at_ca[0x16]; + u8 reserved_at_ca[0x6]; + u8 migration_in_chunks[0x1]; + u8 reserved_at_d1[0xf]; u8 reserved_at_e0[0xc0]; @@ -12392,7 +12394,8 @@ struct mlx5_ifc_query_vhca_migration_state_in_bits { u8 op_mod[0x10]; u8 incremental[0x1]; - u8 reserved_at_41[0xf]; + u8 chunk[0x1]; + u8 reserved_at_42[0xe]; u8 vhca_id[0x10]; u8 reserved_at_60[0x20]; @@ -12408,7 +12411,11 @@ struct mlx5_ifc_query_vhca_migration_state_out_bits { u8 required_umem_size[0x20]; - u8 reserved_at_a0[0x160]; + u8 reserved_at_a0[0x20]; + + u8 remaining_total_size[0x40]; + + u8 reserved_at_100[0x100]; }; struct mlx5_ifc_save_vhca_state_in_bits { @@ -12440,7 +12447,7 @@ struct mlx5_ifc_save_vhca_state_out_bits { u8 actual_image_size[0x20]; - u8 reserved_at_60[0x20]; + u8 next_required_umem_size[0x20]; }; struct mlx5_ifc_load_vhca_state_in_bits { -- cgit v1.2.3 From fb99ef17865035a6657786d4b2af11a27ba23f9b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Aug 2023 15:41:14 +0900 Subject: ata: libata-scsi: link ata port and scsi device There is no direct device ancestry defined between an ata_device and its scsi device which prevents the power management code from correctly ordering suspend and resume operations. Create such ancestry with the ata device as the parent to ensure that the scsi device (child) is suspended before the ata device and that resume handles the ata device before the scsi device. The parent-child (supplier-consumer) relationship is established between the ata_port (parent) and the scsi device (child) with the function device_add_link(). The parent used is not the ata_device as the PM operations are defined per port and the status of all devices connected through that port is controlled from the port operations. The device link is established with the new function ata_scsi_slave_alloc(), and this function is used to define the ->slave_alloc callback of the scsi host template of all ata drivers. Fixes: a19a93e4c6a9 ("scsi: core: pm: Rely on the device driver core for async power management") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Niklas Cassel Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen Reviewed-by: John Garry --- include/linux/libata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 84aca8c44fa3..3ce1ab408114 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1148,6 +1148,7 @@ extern int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]); extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev); +extern int ata_scsi_slave_alloc(struct scsi_device *sdev); extern int ata_scsi_slave_config(struct scsi_device *sdev); extern void ata_scsi_slave_destroy(struct scsi_device *sdev); extern int ata_scsi_change_queue_depth(struct scsi_device *sdev, @@ -1396,6 +1397,7 @@ extern const struct attribute_group *ata_common_sdev_groups[]; .this_id = ATA_SHT_THIS_ID, \ .emulated = ATA_SHT_EMULATED, \ .proc_name = drv_name, \ + .slave_alloc = ata_scsi_slave_alloc, \ .slave_destroy = ata_scsi_slave_destroy, \ .bios_param = ata_std_bios_param, \ .unlock_native_capacity = ata_scsi_unlock_native_capacity,\ -- cgit v1.2.3 From aa3998dbeb3abce63653b7f6d4542e7dcd022590 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 26 Aug 2023 09:43:39 +0900 Subject: ata: libata-scsi: Disable scsi device manage_system_start_stop The introduction of a device link to create a consumer/supplier relationship between the scsi device of an ATA device and the ATA port of that ATA device fixes the ordering of system suspend and resume operations. For suspend, the scsi device is suspended first and the ata port after it. This is fine as this allows the synchronize cache and START STOP UNIT commands issued by the scsi disk driver to be executed before the ata port is disabled. For resume operations, the ata port is resumed first, followed by the scsi device. This allows having the request queue of the scsi device to be unfrozen after the ata port resume is scheduled in EH, thus avoiding to see new requests prematurely issued to the ATA device. Since libata sets manage_system_start_stop to 1, the scsi disk resume operation also results in issuing a START STOP UNIT command to the device being resumed so that the device exits standby power mode. However, restoring the ATA device to the active power mode must be synchronized with libata EH processing of the port resume operation to avoid either 1) seeing the start stop unit command being received too early when the port is not yet resumed and ready to accept commands, or after the port resume process issues commands such as IDENTIFY to revalidate the device. In this last case, the risk is that the device revalidation fails with timeout errors as the drive is still spun down. Commit 0a8589055936 ("ata,scsi: do not issue START STOP UNIT on resume") disabled issuing the START STOP UNIT command to avoid issues with it. But this is incorrect as transitioning a device to the active power mode from the standby power mode set on suspend requires a media access command. The IDENTIFY, READ LOG and SET FEATURES commands executed in libata EH context triggered by the ata port resume operation may thus fail. Fix these synchronization issues is by handling a device power mode transitions for system suspend and resume directly in libata EH context, without relying on the scsi disk driver management triggered with the manage_system_start_stop flag. To do this, the following libata helper functions are introduced: 1) ata_dev_power_set_standby(): This function issues a STANDBY IMMEDIATE command to transitiom a device to the standby power mode. For HDDs, this spins down the disks. This function applies only to ATA and ZAC devices and does nothing otherwise. This function also does nothing for devices that have the ATA_FLAG_NO_POWEROFF_SPINDOWN or ATA_FLAG_NO_HIBERNATE_SPINDOWN flag set. For suspend, call ata_dev_power_set_standby() in ata_eh_handle_port_suspend() before the port is disabled and frozen. ata_eh_unload() is also modified to transition all enabled devices to the standby power mode when the system is shutdown or devices removed. 2) ata_dev_power_set_active() and This function applies to ATA or ZAC devices and issues a VERIFY command for 1 sector at LBA 0 to transition the device to the active power mode. For HDDs, since this function will complete only once the disk spin up. Its execution uses the same timeouts as for reset, to give the drive enough time to complete spinup without triggering a command timeout. For resume, call ata_dev_power_set_active() in ata_eh_revalidate_and_attach() after the port has been enabled and before any other command is issued to the device. With these changes, the manage_system_start_stop and no_start_on_resume scsi device flags do not need to be set in ata_scsi_dev_config(). The flag manage_runtime_start_stop is still set to allow the sd driver to spinup/spindown a disk through the sd runtime operations. Fixes: 0a8589055936 ("ata,scsi: do not issue START STOP UNIT on resume") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen --- include/linux/libata.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 3ce1ab408114..2a7d2af0ed80 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -192,6 +192,7 @@ enum { ATA_PFLAG_UNLOADING = (1 << 9), /* driver is being unloaded */ ATA_PFLAG_UNLOADED = (1 << 10), /* driver is unloaded */ + ATA_PFLAG_RESUMING = (1 << 16), /* port is being resumed */ ATA_PFLAG_SUSPENDED = (1 << 17), /* port is suspended (power) */ ATA_PFLAG_PM_PENDING = (1 << 18), /* PM operation pending */ ATA_PFLAG_INIT_GTM_VALID = (1 << 19), /* initial gtm data valid */ @@ -318,9 +319,10 @@ enum { ATA_EH_ENABLE_LINK = (1 << 3), ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */ ATA_EH_GET_SUCCESS_SENSE = (1 << 6), /* Get sense data for successful cmd */ + ATA_EH_SET_ACTIVE = (1 << 7), /* Set a device to active power mode */ ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK | - ATA_EH_GET_SUCCESS_SENSE, + ATA_EH_GET_SUCCESS_SENSE | ATA_EH_SET_ACTIVE, ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | ATA_EH_ENABLE_LINK, @@ -357,7 +359,7 @@ enum { /* This should match the actual table size of * ata_eh_cmd_timeout_table in libata-eh.c. */ - ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7, + ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 8, /* Horkage types. May be set by libata or controller on drives (some horkage may be drive/controller pair dependent */ -- cgit v1.2.3 From 528ce6781726e022bc5dc84034360e6e8f1b89bd Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 28 Sep 2023 20:43:24 +0800 Subject: io_uring: retain top 8bits of uring_cmd flags for kernel internal use Retain top 8bits of uring_cmd flags for kernel internal use, so that we can move IORING_URING_CMD_POLLED out of uapi header. Reviewed-by: Gabriel Krisman Bertazi Reviewed-by: Anuj Gupta Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 106cdc55ff3b..ae08d6f66e62 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -22,6 +22,9 @@ enum io_uring_cmd_flags { IO_URING_F_IOPOLL = (1 << 10), }; +/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */ +#define IORING_URING_CMD_POLLED (1U << 31) + struct io_uring_cmd { struct file *file; const struct io_uring_sqe *sqe; -- cgit v1.2.3 From 93b8cc60c37b9d17732b7a297e5dca29b50a990d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 28 Sep 2023 20:43:25 +0800 Subject: io_uring: cancelable uring_cmd uring_cmd may never complete, such as ublk, in which uring cmd isn't completed until one new block request is coming from ublk block device. Add cancelable uring_cmd to provide mechanism to driver for cancelling pending commands in its own way. Add API of io_uring_cmd_mark_cancelable() for driver to mark one command as cancelable, then io_uring will cancel this command in io_uring_cancel_generic(). ->uring_cmd() callback is reused for canceling command in driver's way, then driver gets notified with the cancelling from io_uring. Add API of io_uring_cmd_get_task() to help driver cancel handler deal with the canceling. Reviewed-by: Gabriel Krisman Bertazi Suggested-by: Jens Axboe Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 15 +++++++++++++++ include/linux/io_uring_types.h | 6 ++++++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index ae08d6f66e62..b4391e0a9bc8 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -20,9 +20,13 @@ enum io_uring_cmd_flags { IO_URING_F_SQE128 = (1 << 8), IO_URING_F_CQE32 = (1 << 9), IO_URING_F_IOPOLL = (1 << 10), + + /* set when uring wants to cancel a previously issued command */ + IO_URING_F_CANCEL = (1 << 11), }; /* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */ +#define IORING_URING_CMD_CANCELABLE (1U << 30) #define IORING_URING_CMD_POLLED (1U << 31) struct io_uring_cmd { @@ -85,6 +89,9 @@ static inline void io_uring_free(struct task_struct *tsk) __io_uring_free(tsk); } int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags); +void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, + unsigned int issue_flags); +struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd); #else static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd) @@ -125,6 +132,14 @@ static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd, { return -EOPNOTSUPP; } +static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ +} +static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd) +{ + return NULL; +} #endif #endif diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index fe1c5d4ec56c..e178461fa513 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -265,6 +265,12 @@ struct io_ring_ctx { */ struct io_wq_work_list iopoll_list; bool poll_multi_queue; + + /* + * Any cancelable uring_cmd is added to this list in + * ->uring_cmd() by io_uring_cmd_insert_cancelable() + */ + struct hlist_head cancelable_uring_cmd; } ____cacheline_aligned_in_smp; struct { -- cgit v1.2.3 From a941b784b15ff65e1a3b6a259c6d6cf7fa0bb3c3 Mon Sep 17 00:00:00 2001 From: Nipun Gupta Date: Fri, 15 Sep 2023 10:24:21 +0530 Subject: cdx: add support for bus mastering Introduce cdx_set_master() and cdx_clear_master() APIs to support enable and disable of bus mastering. Drivers need to use these APIs to enable/disable DMAs from the CDX devices. Signed-off-by: Nipun Gupta Reviewed-by: Pieter Jansen van Vuuren Link: https://lore.kernel.org/r/20230915045423.31630-1-nipun.gupta@amd.com Signed-off-by: Alex Williamson --- include/linux/cdx/cdx_bus.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cdx/cdx_bus.h b/include/linux/cdx/cdx_bus.h index bead71b7bc73..8320ec3b9e37 100644 --- a/include/linux/cdx/cdx_bus.h +++ b/include/linux/cdx/cdx_bus.h @@ -21,11 +21,13 @@ struct cdx_controller; enum { + CDX_DEV_BUS_MASTER_CONF, CDX_DEV_RESET_CONF, }; struct cdx_device_config { u8 type; + bool bus_master_enable; }; typedef int (*cdx_scan_cb)(struct cdx_controller *cdx); @@ -170,4 +172,20 @@ extern struct bus_type cdx_bus_type; */ int cdx_dev_reset(struct device *dev); +/** + * cdx_set_master - enables bus-mastering for CDX device + * @cdx_dev: the CDX device to enable + * + * Return: 0 for success, -errno on failure + */ +int cdx_set_master(struct cdx_device *cdx_dev); + +/** + * cdx_clear_master - disables bus-mastering for CDX device + * @cdx_dev: the CDX device to disable + * + * Return: 0 for success, -errno on failure + */ +int cdx_clear_master(struct cdx_device *cdx_dev); + #endif /* _CDX_BUS_H_ */ -- cgit v1.2.3 From d427da2323b093a65d8317783e76ab8fad2e2ef0 Mon Sep 17 00:00:00 2001 From: Sui Jingfeng Date: Fri, 25 Aug 2023 14:27:10 +0800 Subject: PCI: Add pci_get_base_class() helper There is no function to get all PCI devices in a system by matching against the base class code only, ignoring the sub-class code and the programming interface. Add pci_get_base_class() to suit the need. For example, if a driver wants to process all PCI display devices in a system, it can do so like this: pdev = NULL; while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) { do_something_for_pci_display_device(pdev); } Link: https://lore.kernel.org/r/20230825062714.6325-2-sui.jingfeng@linux.dev Signed-off-by: Sui Jingfeng [bhelgaas: reword commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Alex Deucher --- include/linux/pci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 8c7c2c3c6c65..40ac1288a2cc 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1181,6 +1181,8 @@ struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn); struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus, unsigned int devfn); struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from); +struct pci_dev *pci_get_base_class(unsigned int class, struct pci_dev *from); + int pci_dev_present(const struct pci_device_id *ids); int pci_bus_read_config_byte(struct pci_bus *bus, unsigned int devfn, @@ -1924,6 +1926,9 @@ static inline struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from) { return NULL; } +static inline struct pci_dev *pci_get_base_class(unsigned int class, + struct pci_dev *from) +{ return NULL; } static inline int pci_dev_present(const struct pci_device_id *ids) { return 0; } -- cgit v1.2.3 From 194bb58c6090e39bd7d9b9c888a079213628e1f6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 8 Jun 2023 11:57:40 -0600 Subject: io_uring: add support for futex wake and wait Add support for FUTEX_WAKE/WAIT primitives. IORING_OP_FUTEX_WAKE is mix of FUTEX_WAKE and FUTEX_WAKE_BITSET, as it does support passing in a bitset. Similary, IORING_OP_FUTEX_WAIT is a mix of FUTEX_WAIT and FUTEX_WAIT_BITSET. For both of them, they are using the futex2 interface. FUTEX_WAKE is straight forward, as those can always be done directly from the io_uring submission without needing async handling. For FUTEX_WAIT, things are a bit more complicated. If the futex isn't ready, then we rely on a callback via futex_queue->wake() when someone wakes up the futex. From that calback, we queue up task_work with the original task, which will post a CQE and wake it, if necessary. Cancelations are supported, both from the application point-of-view, but also to be able to cancel pending waits if the ring exits before all events have occurred. The return value of futex_unqueue() is used to gate who wins the potential race between cancelation and futex wakeups. Whomever gets a 'ret == 1' return from that claims ownership of the io_uring futex request. This is just the barebones wait/wake support. PI or REQUEUE support is not added at this point, unclear if we might look into that later. Likewise, explicit timeouts are not supported either. It is expected that users that need timeouts would do so via the usual io_uring mechanism to do that using linked timeouts. The SQE format is as follows: `addr` Address of futex `fd` futex2(2) FUTEX2_* flags `futex_flags` io_uring specific command flags. None valid now. `addr2` Value of futex `addr3` Mask to wake/wait Acked-by: Peter Zijlstra (Intel) Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index e178461fa513..990984614fca 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -321,6 +321,11 @@ struct io_ring_ctx { struct hlist_head waitid_list; +#ifdef CONFIG_FUTEX + struct hlist_head futex_list; + struct io_alloc_cache futex_cache; +#endif + const struct cred *sq_creds; /* cred used for __io_sq_thread() */ struct io_sq_data *sq_data; /* if using sq thread polling */ -- cgit v1.2.3 From d77008421afda6208b1256c9b218457acd174ca6 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Thu, 17 Aug 2023 21:14:57 -0700 Subject: groups: Convert group_info.usage to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable group_info.usage is used as pure reference counter. Convert it to refcount_t and fix up the operations. **Important note for maintainers: Some functions from refcount_t API defined in refcount.h have different memory ordering guarantees than their atomic counterparts. Please check Documentation/core-api/refcount-vs-atomic.rst for more information. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the group_info.usage it might make a difference in following places: - put_group_info(): decrement in refcount_dec_and_test() only provides RELEASE ordering and ACQUIRE ordering on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook Signed-off-by: Elena Reshetova Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Link: https://lore.kernel.org/r/20230818041456.gonna.009-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/cred.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index f923528d5cc4..92f8d772da6f 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -23,7 +24,7 @@ struct inode; * COW Supplementary groups list */ struct group_info { - atomic_t usage; + refcount_t usage; int ngroups; kgid_t gid[]; } __randomize_layout; @@ -39,7 +40,7 @@ struct group_info { */ static inline struct group_info *get_group_info(struct group_info *gi) { - atomic_inc(&gi->usage); + refcount_inc(&gi->usage); return gi; } @@ -49,7 +50,7 @@ static inline struct group_info *get_group_info(struct group_info *gi) */ #define put_group_info(group_info) \ do { \ - if (atomic_dec_and_test(&(group_info)->usage)) \ + if (refcount_dec_and_test(&(group_info)->usage)) \ groups_free(group_info); \ } while (0) -- cgit v1.2.3 From ce60f27bb62dfeb1bf827350520f34abc84e0933 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 20 Sep 2023 05:09:58 +0100 Subject: mm: abstract moving to the next PFN In order to fix the L1TF vulnerability, x86 can invert the PTE bits for PROT_NONE VMAs, which means we cannot move from one PTE to the next by adding 1 to the PFN field of the PTE. This results in the BUG reported at [1]. Abstract advancing the PTE to the next PFN through a pte_next_pfn() function/macro. Link: https://lkml.kernel.org/r/20230920040958.866520-1-willy@infradead.org Fixes: bcc6cc832573 ("mm: add default definition of set_ptes()") Signed-off-by: Matthew Wilcox (Oracle) Reported-by: syzbot+55cc72f8cc3a549119df@syzkaller.appspotmail.com Closes: https://lkml.kernel.org/r/000000000000d099fa0604f03351@google.com [1] Reviewed-by: Yin Fengwei Cc: Dave Hansen Cc: David Hildenbrand Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 1fba072b3dac..af7639c3b0a3 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -206,6 +206,14 @@ static inline int pmd_young(pmd_t pmd) #endif #ifndef set_ptes + +#ifndef pte_next_pfn +static inline pte_t pte_next_pfn(pte_t pte) +{ + return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); +} +#endif + /** * set_ptes - Map consecutive pages to a contiguous range of addresses. * @mm: Address space to map the pages into. @@ -231,7 +239,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, if (--nr == 0) break; ptep++; - pte = __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); + pte = pte_next_pfn(pte); } arch_leave_lazy_mmu_mode(); } -- cgit v1.2.3 From 5c590804b6b0ff933ed4e5cee5d76de3a5048d9f Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 21 Sep 2023 14:12:35 -0400 Subject: maple_tree: add mas_is_active() to detect in-tree walks Patch series "maple_tree: Fix mas_prev() state regression". Pedro Falcato retported an mprotect regression [1] which was bisected back to the iterator changes for maple tree. Root cause analysis showed the mas_prev() running off the end of the VMA space (previous from 0) followed by mas_find(), would skip the first value. This patchset introduces maple state underflow/overflow so the sequence of calls on the maple state will return what the user expects. Users who encounter this bug may see mprotect(), userfaultfd_register(), and mlock() fail on VMAs mapped with address 0. This patch (of 2): Instead of constantly checking each possibility of the maple state, create a fast path that will skip over checking unlikely states. Link: https://lkml.kernel.org/r/20230921181236.509072-1-Liam.Howlett@oracle.com Link: https://lkml.kernel.org/r/20230921181236.509072-2-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Cc: Pedro Falcato Cc: Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index e41c70ac7744..f66f5f78f8cf 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -511,6 +511,15 @@ static inline bool mas_is_paused(const struct ma_state *mas) return mas->node == MAS_PAUSE; } +/* Check if the mas is pointing to a node or not */ +static inline bool mas_is_active(struct ma_state *mas) +{ + if ((unsigned long)mas->node >= MAPLE_RESERVED_RANGE) + return true; + + return false; +} + /** * mas_reset() - Reset a Maple Tree operation state. * @mas: Maple Tree operation state. -- cgit v1.2.3 From a8091f039c1ebf5cb0d5261e3613f18eb2a5d8b7 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 21 Sep 2023 14:12:36 -0400 Subject: maple_tree: add MAS_UNDERFLOW and MAS_OVERFLOW states When updating the maple tree iterator to avoid rewalks, an issue was introduced when shifting beyond the limits. This can be seen by trying to go to the previous address of 0, which would set the maple node to MAS_NONE and keep the range as the last entry. Subsequent calls to mas_find() would then search upwards from mas->last and skip the value at mas->index/mas->last. This showed up as a bug in mprotect which skips the actual VMA at the current range after attempting to go to the previous VMA from 0. Since MAS_NONE may already be set when searching for a value that isn't contained within a node, changing the handling of MAS_NONE in mas_find() would make the code more complicated and error prone. Furthermore, there was no way to tell which limit was hit, and thus which action to take (next or the entry at the current range). This solution is to add two states to track what happened with the previous iterator action. This allows for the expected behaviour of the next command to return the correct item (either the item at the range requested, or the next/previous). Tests are also added and updated accordingly. Link: https://lkml.kernel.org/r/20230921181236.509072-3-Liam.Howlett@oracle.com Link: https://gist.github.com/heatd/85d2971fae1501b55b6ea401fbbe485b Link: https://lore.kernel.org/linux-mm/20230921181236.509072-1-Liam.Howlett@oracle.com/ Fixes: 39193685d585 ("maple_tree: try harder to keep active node with mas_prev()") Signed-off-by: Liam R. Howlett Reported-by: Pedro Falcato Closes: https://gist.github.com/heatd/85d2971fae1501b55b6ea401fbbe485b Closes: https://bugs.archlinux.org/task/79656 Cc: Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index f66f5f78f8cf..d01e850b570f 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -428,6 +428,8 @@ struct ma_wr_state { #define MAS_ROOT ((struct maple_enode *)5UL) #define MAS_NONE ((struct maple_enode *)9UL) #define MAS_PAUSE ((struct maple_enode *)17UL) +#define MAS_OVERFLOW ((struct maple_enode *)33UL) +#define MAS_UNDERFLOW ((struct maple_enode *)65UL) #define MA_ERROR(err) \ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) -- cgit v1.2.3 From 935d4f0c6dc8b3533e6e39346de7389a84490178 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Fri, 22 Sep 2023 12:58:03 +0100 Subject: mm: hugetlb: add huge page size param to set_huge_pte_at() Patch series "Fix set_huge_pte_at() panic on arm64", v2. This series fixes a bug in arm64's implementation of set_huge_pte_at(), which can result in an unprivileged user causing a kernel panic. The problem was triggered when running the new uffd poison mm selftest for HUGETLB memory. This test (and the uffd poison feature) was merged for v6.5-rc7. Ideally, I'd like to get this fix in for v6.6 and I've cc'ed stable (correctly this time) to get it backported to v6.5, where the issue first showed up. Description of Bug ================== arm64's huge pte implementation supports multiple huge page sizes, some of which are implemented in the page table with multiple contiguous entries. So set_huge_pte_at() needs to work out how big the logical pte is, so that it can also work out how many physical ptes (or pmds) need to be written. It previously did this by grabbing the folio out of the pte and querying its size. However, there are cases when the pte being set is actually a swap entry. But this also used to work fine, because for huge ptes, we only ever saw migration entries and hwpoison entries. And both of these types of swap entries have a PFN embedded, so the code would grab that and everything still worked out. But over time, more calls to set_huge_pte_at() have been added that set swap entry types that do not embed a PFN. And this causes the code to go bang. The triggering case is for the uffd poison test, commit 99aa77215ad0 ("selftests/mm: add uffd unit test for UFFDIO_POISON"), which causes a PTE_MARKER_POISONED swap entry to be set, coutesey of commit 8a13897fb0da ("mm: userfaultfd: support UFFDIO_POISON for hugetlbfs") - added in v6.5-rc7. Although review shows that there are other call sites that set PTE_MARKER_UFFD_WP (which also has no PFN), these don't trigger on arm64 because arm64 doesn't support UFFD WP. If CONFIG_DEBUG_VM is enabled, we do at least get a BUG(), but otherwise, it will dereference a bad pointer in page_folio(): static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry) { VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry)); return page_folio(pfn_to_page(swp_offset_pfn(entry))); } Fix === The simplest fix would have been to revert the dodgy cleanup commit 18f3962953e4 ("mm: hugetlb: kill set_huge_swap_pte_at()"), but since things have moved on, this would have required an audit of all the new set_huge_pte_at() call sites to see if they should be converted to set_huge_swap_pte_at(). As per the original intent of the change, it would also leave us open to future bugs when people invariably get it wrong and call the wrong helper. So instead, I've added a huge page size parameter to set_huge_pte_at(). This means that the arm64 code has the size in all cases. It's a bigger change, due to needing to touch the arches that implement the function, but it is entirely mechanical, so in my view, low risk. I've compile-tested all touched arches; arm64, parisc, powerpc, riscv, s390, sparc (and additionally x86_64). I've additionally booted and run mm selftests against arm64, where I observe the uffd poison test is fixed, and there are no other regressions. This patch (of 2): In order to fix a bug, arm64 needs to be told the size of the huge page for which the pte is being set in set_huge_pte_at(). Provide for this by adding an `unsigned long sz` parameter to the function. This follows the same pattern as huge_pte_clear(). This commit makes the required interface modifications to the core mm as well as all arches that implement this function (arm64, parisc, powerpc, riscv, s390, sparc). The actual arm64 bug will be fixed in a separate commit. No behavioral changes intended. Link: https://lkml.kernel.org/r/20230922115804.2043771-1-ryan.roberts@arm.com Link: https://lkml.kernel.org/r/20230922115804.2043771-2-ryan.roberts@arm.com Fixes: 8a13897fb0da ("mm: userfaultfd: support UFFDIO_POISON for hugetlbfs") Signed-off-by: Ryan Roberts Reviewed-by: Christophe Leroy [powerpc 8xx] Reviewed-by: Lorenzo Stoakes [vmalloc change] Cc: Alexandre Ghiti Cc: Albert Ou Cc: Alexander Gordeev Cc: Anshuman Khandual Cc: Arnd Bergmann Cc: Axel Rasmussen Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Mike Kravetz Cc: Muchun Song Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Xu Cc: Qi Zheng Cc: Ryan Roberts Cc: SeongJae Park Cc: Sven Schnelle Cc: Uladzislau Rezki (Sony) Cc: Vasily Gorbik Cc: Will Deacon Cc: [6.5+] Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5b2626063f4f..a30686e649f7 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -984,7 +984,9 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } #endif @@ -1173,7 +1175,7 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, } static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + pte_t *ptep, pte_t pte, unsigned long sz) { } -- cgit v1.2.3 From 348cbf987ed328682af7d74ce98d9f0e6857f42f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 30 Sep 2023 09:20:20 -0700 Subject: Input: mt - annotate struct input_mt with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct input_mt. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20230922175036.work.762-kees@kernel.org Signed-off-by: Dmitry Torokhov --- include/linux/input/mt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index 3b8580bd33c1..2cf89a538b18 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -47,7 +47,7 @@ struct input_mt { unsigned int flags; unsigned int frame; int *red; - struct input_mt_slot slots[]; + struct input_mt_slot slots[] __counted_by(num_slots); }; static inline void input_mt_set_value(struct input_mt_slot *slot, -- cgit v1.2.3 From fce9c967820a72f600abbf061d7077861685a14d Mon Sep 17 00:00:00 2001 From: Ingo Rohloff Date: Sat, 26 Aug 2023 22:02:41 +0200 Subject: wifi: mt76: mt7921e: Support MT7992 IP in Xiaomi Redmibook 15 Pro (2023) In the Xiaomi Redmibook 15 Pro (2023) laptop I have got, a wifi chip is used, which according to its PCI Vendor ID is from "ITTIM Technology". This chip works flawlessly with the mt7921e module. The driver doesn't bind to this PCI device, because the Vendor ID from "ITTIM Technology" is not recognized. This patch adds the PCI Vendor ID from "ITTIM Technology" to the list of PCI Vendor IDs and lets the mt7921e driver bind to the mentioned wifi chip. Signed-off-by: Ingo Rohloff Signed-off-by: Felix Fietkau --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2dc75df1437f..6ae1803bcd2f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -180,6 +180,8 @@ #define PCI_DEVICE_ID_BERKOM_A4T 0xffa4 #define PCI_DEVICE_ID_BERKOM_SCITEL_QUADRO 0xffa8 +#define PCI_VENDOR_ID_ITTIM 0x0b48 + #define PCI_VENDOR_ID_COMPAQ 0x0e11 #define PCI_DEVICE_ID_COMPAQ_TOKENRING 0x0508 #define PCI_DEVICE_ID_COMPAQ_TACHYON 0xa0fc -- cgit v1.2.3 From 0b068c714ca9479d2783cc333fff5bc2d4a6d45c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 08:52:16 +0000 Subject: net: add DEV_STATS_READ() helper Companion of DEV_STATS_INC() & DEV_STATS_ADD(). This is going to be used in the series. Use it in macsec_get_stats64(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7e520c14eb8c..e070a4540fba 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5236,5 +5236,6 @@ extern struct net_device *blackhole_netdev; #define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD) #define DEV_STATS_ADD(DEV, FIELD, VAL) \ atomic_long_add((VAL), &(DEV)->stats.__##FIELD) +#define DEV_STATS_READ(DEV, FIELD) atomic_long_read(&(DEV)->stats.__##FIELD) #endif /* _LINUX_NETDEVICE_H */ -- cgit v1.2.3 From fa4c4507099f781ca89a748c480af9cf97629726 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 28 Sep 2023 16:31:35 +0200 Subject: iommu: Allow .iotlb_sync_map to fail and handle s390's -ENOMEM return On s390 when using a paging hypervisor, .iotlb_sync_map is used to sync mappings by letting the hypervisor inspect the synced IOVA range and updating a shadow table. This however means that .iotlb_sync_map can fail as the hypervisor may run out of resources while doing the sync. This can be due to the hypervisor being unable to pin guest pages, due to a limit on mapped addresses such as vfio_iommu_type1.dma_entry_limit or lack of other resources. Either way such a failure to sync a mapping should result in a DMA_MAPPING_ERROR. Now especially when running with batched IOTLB flushes for unmap it may be that some IOVAs have already been invalidated but not yet synced via .iotlb_sync_map. Thus if the hypervisor indicates running out of resources, first do a global flush allowing the hypervisor to free resources associated with these mappings as well a retry creating the new mappings and only if that also fails report this error to callers. Reviewed-by: Lu Baolu Reviewed-by: Matthew Rosato Acked-by: Jernej Skrabec # sun50i Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20230928-dma_iommu-v13-1-9e5fc4dacc36@linux.ibm.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 64bd20142cbe..1eb638752781 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -350,8 +350,8 @@ struct iommu_domain_ops { struct iommu_iotlb_gather *iotlb_gather); void (*flush_iotlb_all)(struct iommu_domain *domain); - void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, - size_t size); + int (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, + size_t size); void (*iotlb_sync)(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather); -- cgit v1.2.3 From 32d5bc8b09c7cc48c511809e7c3b1755c7ecc5fa Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 28 Sep 2023 16:31:39 +0200 Subject: iommu/dma: Allow a single FQ in addition to per-CPU FQs In some virtualized environments, including s390 paged memory guests, IOTLB flushes are used to update IOMMU shadow tables. Due to this, they are much more expensive than in typical bare metal environments or non-paged s390 guests. In addition they may parallelize poorly in virtualized environments. This changes the trade off for flushing IOVAs such that minimizing the number of IOTLB flushes trumps any benefit of cheaper queuing operations or increased paralellism. In this scenario per-CPU flush queues pose several problems. Firstly per-CPU memory is often quite limited prohibiting larger queues. Secondly collecting IOVAs per-CPU but flushing via a global timeout reduces the number of IOVAs flushed for each timeout especially on s390 where PCI interrupts may not be bound to a specific CPU. Let's introduce a single flush queue mode that reuses the same queue logic but only allocates a single global queue. This mode is selected by dma-iommu if a newly introduced .shadow_on_flush flag is set in struct dev_iommu. As a first user the s390 IOMMU driver sets this flag during probe_device. With the unchanged small FQ size and timeouts this setting is worse than per-CPU queues but a follow up patch will make the FQ size and timeout variable. Together this allows the common IOVA flushing code to more closely resemble the global flush behavior used on s390's previous internal DMA API implementation. Link: https://lore.kernel.org/all/9a466109-01c5-96b0-bf03-304123f435ee@arm.com/ Acked-by: Robin Murphy Reviewed-by: Matthew Rosato #s390 Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20230928-dma_iommu-v13-5-9e5fc4dacc36@linux.ibm.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1eb638752781..0c4d8ae985ac 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -424,6 +424,7 @@ struct iommu_fault_param { * @attach_deferred: the dma domain attachment is deferred * @pci_32bit_workaround: Limit DMA allocations to 32-bit IOVAs * @require_direct: device requires IOMMU_RESV_DIRECT regions + * @shadow_on_flush: IOTLB flushes are used to sync shadow tables * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -439,6 +440,7 @@ struct dev_iommu { u32 attach_deferred:1; u32 pci_32bit_workaround:1; u32 require_direct:1; + u32 shadow_on_flush:1; }; int iommu_device_register(struct iommu_device *iommu, -- cgit v1.2.3 From 0d293714ac32650bfb669ceadf7cc2fad8161401 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 21 Sep 2023 15:10:27 +0300 Subject: RDMA/mlx5: Send events from IB driver about device affiliation state Send blocking events from IB driver whenever the device is done being affiliated or if it is removed from an affiliation. This is useful since now the EN driver can register to those event and know when a device is affiliated or not. Signed-off-by: Patrisious Haddad Reviewed-by: Mark Bloch Link: https://lore.kernel.org/r/a7491c3e483cfd8d962f5f75b9a25f253043384a.1695296682.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 2 ++ include/linux/mlx5/driver.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4d5be378fa8c..26333d602a50 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -366,6 +366,8 @@ enum mlx5_driver_event { MLX5_DRIVER_EVENT_UPLINK_NETDEV, MLX5_DRIVER_EVENT_MACSEC_SA_ADDED, MLX5_DRIVER_EVENT_MACSEC_SA_DELETED, + MLX5_DRIVER_EVENT_AFFILIATION_DONE, + MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, }; enum { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3033bbaeac81..5ca4e085d813 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1027,6 +1027,8 @@ bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev); void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *mdev); +void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data); + void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); -- cgit v1.2.3 From ef36ffcb381096ed32c309c342a02bf62939c503 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 21 Sep 2023 15:10:30 +0300 Subject: net/mlx5: Add alias flow table bits Add all the capabilities needed to check for alias object support. As well as all the fields or commands needed for its creation and the creation of flow table that is able to jump to an alias object. Signed-off-by: Patrisious Haddad Reviewed-by: Mark Bloch Link: https://lore.kernel.org/r/544c030f2a78c4adf3fe6b64f97a39cc1bbdabb9.1695296682.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 56 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3265bfcb3156..23f9780adb83 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -312,6 +312,7 @@ enum { MLX5_CMD_OP_QUERY_VHCA_STATE = 0xb0d, MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e, MLX5_CMD_OP_SYNC_CRYPTO = 0xb12, + MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS = 0xb16, MLX5_CMD_OP_MAX }; @@ -1934,6 +1935,14 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 match_definer_format_supported[0x40]; }; +enum { + MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_TO_REMOTE_FLOW_TABLE_MISS = 0x80000, +}; + +enum { + MLX5_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_FLOW_TABLE = 0x200, +}; + struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_0[0x80]; @@ -1952,7 +1961,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 migration_in_chunks[0x1]; u8 reserved_at_d1[0xf]; - u8 reserved_at_e0[0xc0]; + u8 cross_vhca_object_to_object_supported[0x20]; + + u8 allowed_object_for_other_vhca_access[0x40]; + + u8 reserved_at_140[0x60]; u8 flow_table_type_2_type[0x8]; u8 reserved_at_1a8[0x3]; @@ -6371,6 +6384,28 @@ struct mlx5_ifc_general_obj_out_cmd_hdr_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_allow_other_vhca_access_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + u8 reserved_at_40[0x50]; + u8 object_type_to_be_accessed[0x10]; + u8 object_id_to_be_accessed[0x20]; + u8 reserved_at_c0[0x40]; + union { + u8 access_key_raw[0x100]; + u8 access_key[8][0x20]; + }; +}; + +struct mlx5_ifc_allow_other_vhca_access_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + u8 syndrome[0x20]; + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_modify_header_arg_bits { u8 reserved_at_0[0x80]; @@ -6393,6 +6428,24 @@ struct mlx5_ifc_create_match_definer_out_bits { struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; }; +struct mlx5_ifc_alias_context_bits { + u8 vhca_id_to_be_accessed[0x10]; + u8 reserved_at_10[0xd]; + u8 status[0x3]; + u8 object_id_to_be_accessed[0x20]; + u8 reserved_at_40[0x40]; + union { + u8 access_key_raw[0x100]; + u8 access_key[8][0x20]; + }; + u8 metadata[0x80]; +}; + +struct mlx5_ifc_create_alias_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_alias_context_bits alias_ctx; +}; + enum { MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, @@ -11919,6 +11972,7 @@ enum { MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24, MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27, MLX5_GENERAL_OBJECT_TYPES_INT_KEK = 0x47, + MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS = 0xff15, }; enum { -- cgit v1.2.3 From d424348b060d87f92cc59d8e6ea9c612c5b708f5 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 28 Sep 2023 19:45:12 +0300 Subject: vdpa/mlx5: Expose descriptor group mkey hw capability Necessary for improved live migration flow. Actual support will be added in a downstream patch. Reviewed-by: Gal Pressman Signed-off-by: Dragos Tatulea Link: https://lore.kernel.org/r/20230928164550.980832-3-dtatulea@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fc3db401f8a2..3388007c645f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1231,7 +1231,13 @@ struct mlx5_ifc_virtio_emulation_cap_bits { u8 max_emulated_devices[0x8]; u8 max_num_virtio_queues[0x18]; - u8 reserved_at_a0[0x60]; + u8 reserved_at_a0[0x20]; + + u8 reserved_at_c0[0x13]; + u8 desc_group_mkey_supported[0x1]; + u8 reserved_at_d4[0xc]; + + u8 reserved_at_e0[0x20]; u8 umem_1_buffer_param_a[0x20]; -- cgit v1.2.3 From a60359ea32251399c00d934981f5a6fa25ec917f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 22 Sep 2023 08:45:51 +0200 Subject: usb: renesas_usbhs: remove boilerplate from header file There is a SPDX entry, so we can remove the boilerplate. Signed-off-by: Wolfram Sang Reviewed-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230922064551.4663-1-wsa+renesas@sang-engineering.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/renesas_usbhs.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index d418c55523a7..372898d9eeb0 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -5,16 +5,6 @@ * Copyright (C) 2011 Renesas Solutions Corp. * Copyright (C) 2019 Renesas Electronics Corporation * Kuninori Morimoto - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * */ #ifndef RENESAS_USB_H #define RENESAS_USB_H -- cgit v1.2.3 From a17fae8fc38e91026f116a85c5068668fbf9848a Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Tue, 19 Sep 2023 19:32:39 -0700 Subject: usb: typec: Add Displayport Alternate Mode 2.1 Support Displayport Alternate mode 2.1 requires configuration for additional cable details such as signalling for cable, UHBR13.5 Support, Cable type and DPAM version. These details can be used with mux drivers to configure SOP DP configuration for Displayport Alternate mode 2.1. This change also includes pertinent cable signalling support in displayport alternate mode. Reviewed-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Signed-off-by: Utkarsh Patel Link: https://lore.kernel.org/r/20230920023243.2494410-2-utkarsh.h.patel@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_dp.h | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index 8d09c2f0a9b8..1f358098522d 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -67,8 +67,10 @@ enum { #define DP_CAP_UFP_D 1 #define DP_CAP_DFP_D 2 #define DP_CAP_DFP_D_AND_UFP_D 3 -#define DP_CAP_DP_SIGNALING BIT(2) /* Always set */ -#define DP_CAP_GEN2 BIT(3) /* Reserved after v1.0b */ +#define DP_CAP_DP_SIGNALLING(_cap_) (((_cap_) & GENMASK(5, 2)) >> 2) +#define DP_CAP_SIGNALLING_HBR3 1 +#define DP_CAP_SIGNALLING_UHBR10 2 +#define DP_CAP_SIGNALLING_UHBR20 3 #define DP_CAP_RECEPTACLE BIT(6) #define DP_CAP_USB BIT(7) #define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(15, 8)) >> 8) @@ -78,6 +80,13 @@ enum { DP_CAP_UFP_D_PIN_ASSIGN(_cap_) : DP_CAP_DFP_D_PIN_ASSIGN(_cap_)) #define DP_CAP_PIN_ASSIGN_DFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ DP_CAP_DFP_D_PIN_ASSIGN(_cap_) : DP_CAP_UFP_D_PIN_ASSIGN(_cap_)) +#define DP_CAP_UHBR_13_5_SUPPORT BIT(26) +#define DP_CAP_CABLE_TYPE(_cap_) (((_cap_) & GENMASK(29, 28)) >> 28) +#define DP_CAP_CABLE_TYPE_PASSIVE 0 +#define DP_CAP_CABLE_TYPE_RE_TIMER 1 +#define DP_CAP_CABLE_TYPE_RE_DRIVER 2 +#define DP_CAP_CABLE_TYPE_OPTICAL 3 +#define DP_CAP_DPAM_VERSION BIT(30) /* DisplayPort Status Update VDO bits */ #define DP_STATUS_CONNECTION(_status_) ((_status_) & 3) @@ -97,13 +106,24 @@ enum { #define DP_CONF_CURRENTLY(_conf_) ((_conf_) & 3) #define DP_CONF_UFP_U_AS_DFP_D BIT(0) #define DP_CONF_UFP_U_AS_UFP_D BIT(1) -#define DP_CONF_SIGNALING_DP BIT(2) -#define DP_CONF_SIGNALING_GEN_2 BIT(3) /* Reserved after v1.0b */ +#define DP_CONF_SIGNALLING_MASK GENMASK(5, 2) +#define DP_CONF_SIGNALLING_SHIFT 2 +#define DP_CONF_SIGNALLING_HBR3 1 +#define DP_CONF_SIGNALLING_UHBR10 2 +#define DP_CONF_SIGNALLING_UHBR20 3 #define DP_CONF_PIN_ASSIGNEMENT_SHIFT 8 #define DP_CONF_PIN_ASSIGNEMENT_MASK GENMASK(15, 8) /* Helper for setting/getting the pin assignment value to the configuration */ #define DP_CONF_SET_PIN_ASSIGN(_a_) ((_a_) << 8) #define DP_CONF_GET_PIN_ASSIGN(_conf_) (((_conf_) & GENMASK(15, 8)) >> 8) +#define DP_CONF_UHBR13_5_SUPPORT BIT(26) +#define DP_CONF_CABLE_TYPE_MASK GENMASK(29, 28) +#define DP_CONF_CABLE_TYPE_SHIFT 28 +#define DP_CONF_CABLE_TYPE_PASSIVE 0 +#define DP_CONF_CABLE_TYPE_RE_TIMER 1 +#define DP_CONF_CABLE_TYPE_RE_DRIVER 2 +#define DP_CONF_CABLE_TYPE_OPTICAL 3 +#define DP_CONF_DPAM_VERSION BIT(30) #endif /* __USB_TYPEC_DP_H */ -- cgit v1.2.3 From c365b1e1f40499472433cc8fca3d0ea280ead52a Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Tue, 19 Sep 2023 19:32:40 -0700 Subject: usb: typec: Add Active or Passive cable defination to cable discover mode VDO As per USB Type-C Connector specification v2.2 section F.2.6, BIT25 represents Active or Passive cable. Added BIT25 defination to the Thunderbolt 3 cable discover mode VDO. Reviewed-by: Heikki Krogerus Signed-off-by: Utkarsh Patel Link: https://lore.kernel.org/r/20230920023243.2494410-3-utkarsh.h.patel@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_tbt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec_tbt.h b/include/linux/usb/typec_tbt.h index 63dd44b72e0c..c7a2153bd6f5 100644 --- a/include/linux/usb/typec_tbt.h +++ b/include/linux/usb/typec_tbt.h @@ -46,6 +46,7 @@ struct typec_thunderbolt_data { #define TBT_CABLE_OPTICAL BIT(21) #define TBT_CABLE_RETIMER BIT(22) #define TBT_CABLE_LINK_TRAINING BIT(23) +#define TBT_CABLE_ACTIVE_PASSIVE BIT(25) #define TBT_SET_CABLE_SPEED(_s_) (((_s_) & GENMASK(2, 0)) << 16) #define TBT_SET_CABLE_ROUNDED(_g_) (((_g_) & GENMASK(1, 0)) << 19) -- cgit v1.2.3 From f9ee6043283a78c84adf6ef3cef7a085eee4130f Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Tue, 19 Sep 2023 19:32:41 -0700 Subject: usb: pd: Add helper macro to get Type C cable speed Added a helper macro to get the Type C cable speed when provided the cable VDO. Reviewed-by: Heikki Krogerus Signed-off-by: Utkarsh Patel Link: https://lore.kernel.org/r/20230920023243.2494410-4-utkarsh.h.patel@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd_vdo.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/pd_vdo.h b/include/linux/usb/pd_vdo.h index b057250704e8..3a747938cdab 100644 --- a/include/linux/usb/pd_vdo.h +++ b/include/linux/usb/pd_vdo.h @@ -376,6 +376,7 @@ | ((vbm) & 0x3) << 9 | (sbu) << 8 | (sbut) << 7 | ((cur) & 0x3) << 5 \ | (vbt) << 4 | (sopp) << 3 | ((spd) & 0x7)) +#define VDO_TYPEC_CABLE_SPEED(vdo) ((vdo) & 0x7) #define VDO_TYPEC_CABLE_TYPE(vdo) (((vdo) >> 18) & 0x3) /* -- cgit v1.2.3 From 1b8a62937e0b23c41956feec778ca7776a01df48 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 26 Sep 2023 15:25:31 +0200 Subject: ASoC: ti: Convert TWL4030 to use GPIO descriptors The TWL4030 is actually only ever populated from the device tree, so we can just pass the right device and headphone jack GPIO name to snd_soc_jack_add_gpios() and it will pick the right GPIO right from the device tree. The platform data patch is unused (no in-tree users of the pdata method) but these can use GPIO descriptor tables rather than global GPIO numbers if they need this. Signed-off-by: Linus Walleij Acked-by: Jarkko Nikula Link: https://lore.kernel.org/r/20230926-descriptors-asoc-ti-v1-3-60cf4f8adbc5@linaro.org Signed-off-by: Mark Brown --- include/linux/platform_data/omap-twl4030.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/omap-twl4030.h b/include/linux/platform_data/omap-twl4030.h index 0dd851ea1c72..7fcb55fe21c9 100644 --- a/include/linux/platform_data/omap-twl4030.h +++ b/include/linux/platform_data/omap-twl4030.h @@ -37,9 +37,6 @@ struct omap_tw4030_pdata { bool has_digimic0; bool has_digimic1; u8 has_linein; - - /* Jack detect GPIO or <= 0 if it is not implemented */ - int jack_detect; }; #endif /* _OMAP_TWL4030_H_ */ -- cgit v1.2.3 From 52e24f8c0a102ac76649c6b71224fadcc82bd5da Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 11 Sep 2023 14:56:52 +0200 Subject: usb: pci-quirks: handle HAS_IOPORT dependency for AMD quirk In a future patch HAS_IOPORT=n will result in inb()/outb() and friends not being declared. In the pci-quirks case the I/O port acceses are used in the quirks for several AMD south bridges, Add a config option for the AMD quirks to depend on HAS_IOPORT and #ifdef the quirk code. Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20230911125653.1393895-3-schnelle@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 61d4f0b793dc..00724b4f6e12 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -484,8 +484,25 @@ extern int usb_hcd_pci_probe(struct pci_dev *dev, extern void usb_hcd_pci_remove(struct pci_dev *dev); extern void usb_hcd_pci_shutdown(struct pci_dev *dev); +#ifdef CONFIG_USB_PCI_AMD extern int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *dev); +static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev, + const struct hc_driver *driver) +{ + if (!usb_hcd_amd_remote_wakeup_quirk(dev)) + return false; + if (driver->flags & (HCD_USB11 | HCD_USB3)) + return true; + return false; +} +#else /* CONFIG_USB_PCI_AMD */ +static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev, + const struct hc_driver *driver) +{ + return false; +} +#endif extern const struct dev_pm_ops usb_hcd_pci_pm_ops; #endif /* CONFIG_USB_PCI */ -- cgit v1.2.3 From 568441b7d45fc7198a89b522d721428f2005c356 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 2 Oct 2023 17:22:40 +0300 Subject: usb: pd: Exposing the Peak Current value of Fixed Supplies to user space Exposing the value of the field as is. The Peak Current value has to be interpreted as described in Table 6-10 (Fixed Power Source Peak Current Capability) of the USB Power Delivery Specification, but that interpretation will be done in user space, not in kernel. Suggested-by: Douglas Gilbert Reviewed-by: Guenter Roeck Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231002142240.2641962-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index c59fb79a42e8..eb626af0e4e7 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -228,6 +228,7 @@ enum pd_pdo_type { #define PDO_FIXED_UNCHUNK_EXT BIT(24) /* Unchunked Extended Message supported (Source) */ #define PDO_FIXED_FRS_CURR_MASK (BIT(24) | BIT(23)) /* FR_Swap Current (Sink) */ #define PDO_FIXED_FRS_CURR_SHIFT 23 +#define PDO_FIXED_PEAK_CURR_SHIFT 20 #define PDO_FIXED_VOLT_SHIFT 10 /* 50mV units */ #define PDO_FIXED_CURR_SHIFT 0 /* 10mA units */ -- cgit v1.2.3 From 3551ff7c5cfff4dc27fdcd14fa286edc08d78088 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 28 Aug 2023 17:43:02 +0200 Subject: usb: gadget: clarify usage of USB_GADGET_DELAYED_STATUS USB_GADGET_DELAYED_STATUS was introduced in commit 1b9ba000177e ("usb: gadget: composite: Allow function drivers to pause control transfers"). It was initially intended for the composite framework to allow delaying completing the status stage of a SET_CONFIGURATION request until all functions are ready. Unfortunately, that commit had an unintended side-effect of returning USB_GADGET_DELAYED_STATUS from the ->setup() call of the composite framework gadget driver. As a result of this and the incomplete documentation, some UDC drivers started relying on USB_GADGET_DELAYED_STATUS to decide when to avoid autocompleting the status stage for 0-length control transfers. dwc3 was the first in commit 5bdb1dcc6330 ("usb: dwc3: ep0: handle delayed_status again"). And a number of other UDC drivers followed later, probably relying on the dwc3 behavior as a reference. Unfortunately, this violated the interface between the UDC and the gadget driver for 0-length control transfers: the UDC driver must only proceed with the status stage for a 0-length control transfer once the gadget driver queued a response to EP0. As a result, a few gadget drivers are partially broken when used with a UDC that only delays the status stage for 0-length transfers when USB_GADGET_DELAYED_STATUS is returned from the setup() callback. This includes Raw Gadget and GadgetFS. For FunctionFS, a workaround was added in commit 946ef68ad4e4 ("usb: gadget: ffs: Let setup() return USB_GADGET_DELAYED_STATUS") and commit 4d644abf2569 ("usb: gadget: f_fs: Only return delayed status when len is 0"). The proper solution to this issue would be to contain USB_GADGET_DELAYED_STATUS within the composite framework and make all UDC drivers to not complete the status stage for 0-length requests on their own. Unfortunately, there is quite a few UDC drivers that need to get fixed and the required changes for some of them are not trivial. For now, update the comments to clarify that USB_GADGET_DELAYED_STATUS must not be used by the UDC drivers. The following two commits also add workarounds to Raw Gadget and GadgetFS to make them compatible with the broken UDC drivers until they are fixed. Acked-by: Alan Stern Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/7f0ee06c68c7241c844cd50f8565fdd5ead79b1b.1693237258.git.andreyknvl@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 8 ++++++++ include/linux/usb/gadget.h | 9 +++++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 6014340ba980..af3cd2aae4bc 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -35,6 +35,14 @@ * are ready. The control transfer will then be kept from completing till * all the function drivers that requested for USB_GADGET_DELAYED_STAUS * invoke usb_composite_setup_continue(). + * + * NOTE: USB_GADGET_DELAYED_STATUS must not be used in UDC drivers: they + * must delay completing the status stage for 0-length control transfers + * regardless of the whether USB_GADGET_DELAYED_STATUS is returned from + * the gadget driver's setup() callback. + * Currently, a number of UDC drivers rely on USB_GADGET_DELAYED_STATUS, + * which is a bug. These drivers must be fixed and USB_GADGET_DELAYED_STATUS + * must be contained within the composite framework. */ #define USB_GADGET_DELAYED_STATUS 0x7fff /* Impossibly large value */ diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 75bda0783395..6532beb587b1 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -711,6 +711,15 @@ static inline int usb_gadget_check_config(struct usb_gadget *gadget) * get_interface. Setting a configuration (or interface) is where * endpoints should be activated or (config 0) shut down. * + * The gadget driver's setup() callback does not have to queue a response to + * ep0 within the setup() call, the driver can do it after setup() returns. + * The UDC driver must wait until such a response is queued before proceeding + * with the data/status stages of the control transfer. + * + * NOTE: Currently, a number of UDC drivers rely on USB_GADGET_DELAYED_STATUS + * being returned from the setup() callback, which is a bug. See the comment + * next to USB_GADGET_DELAYED_STATUS for details. + * * (Note that only the default control endpoint is supported. Neither * hosts nor devices generally support control traffic except to ep0.) * -- cgit v1.2.3 From 5234193ee2b997e59326b047610a8f3f64a0ce02 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 Sep 2023 13:15:17 -0700 Subject: ceph: Annotate struct ceph_osd_request with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct ceph_osd_request. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Ilya Dryomov Cc: Xiubo Li Cc: Jeff Layton Cc: ceph-devel@vger.kernel.org Reviewed-by: "Gustavo A. R. Silva" Reviewed-by: Xiubo Li Link: https://lore.kernel.org/r/20230915201517.never.373-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/ceph/osd_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index bf9823956758..b8610e9d2471 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -278,7 +278,7 @@ struct ceph_osd_request { int r_attempts; u32 r_map_dne_bound; - struct ceph_osd_req_op r_ops[]; + struct ceph_osd_req_op r_ops[] __counted_by(r_num_ops); }; struct ceph_request_redirect { -- cgit v1.2.3 From 210d4e9c732fa87b584af72faae96c037d9d7957 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:28:44 -0700 Subject: ipv4/igmp: Annotate struct ip_sf_socklist with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct ip_sf_socklist. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Martin KaFai Lau Cc: Alexei Starovoitov Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20230922172858.3822653-2-keescook@chromium.org Signed-off-by: Jakub Kicinski --- include/linux/igmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index ebf4349a53af..5171231f70a8 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -39,7 +39,7 @@ struct ip_sf_socklist { unsigned int sl_max; unsigned int sl_count; struct rcu_head rcu; - __be32 sl_addr[]; + __be32 sl_addr[] __counted_by(sl_max); }; #define IP_SFBLOCK 10 /* allocate this many at once */ -- cgit v1.2.3 From b7768e67af9a5b6d6101cbfc146969fedf8df4be Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 28 Sep 2023 16:55:33 +0800 Subject: regulator: mt6358: Add missing regulators for MT6366 When support for the MT6366 PMIC regulators was added, it was assumed that it had the same functionality as MT6358. In reality there are differences. A few regulators have different ranges, or were renamed and repurposed, or removed altogether. Add the 3 regulators that were missing from the original submission. These are added for completeness. VSRAM_CORE is not used in existing projects. VM18 and VMDDR feed DRAM related consumers, and are not used in-kernel. Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Acked-by: Lee Jones Link: https://lore.kernel.org/r/20230928085537.3246669-11-wenst@chromium.org Signed-off-by: Mark Brown --- include/linux/mfd/mt6358/registers.h | 17 +++++++++++++++++ include/linux/regulator/mt6358-regulator.h | 3 +++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/mt6358/registers.h b/include/linux/mfd/mt6358/registers.h index 5ea2590be710..d83e87298ac4 100644 --- a/include/linux/mfd/mt6358/registers.h +++ b/include/linux/mfd/mt6358/registers.h @@ -294,4 +294,21 @@ #define MT6358_AUD_TOP_INT_CON0 0x2228 #define MT6358_AUD_TOP_INT_STATUS0 0x2234 +/* + * MT6366 has no VCAM*, but has other regulators in its place. The names + * keep the MT6358 prefix for ease of use in the regulator driver. + */ +#define MT6358_LDO_VSRAM_CON5 0x1bf8 +#define MT6358_LDO_VM18_CON0 MT6358_LDO_VCAMA1_CON0 +#define MT6358_LDO_VM18_CON1 MT6358_LDO_VCAMA1_CON1 +#define MT6358_LDO_VM18_CON2 MT6358_LDO_VCAMA1_CON2 +#define MT6358_LDO_VMDDR_CON0 MT6358_LDO_VCAMA2_CON0 +#define MT6358_LDO_VMDDR_CON1 MT6358_LDO_VCAMA2_CON1 +#define MT6358_LDO_VMDDR_CON2 MT6358_LDO_VCAMA2_CON2 +#define MT6358_LDO_VSRAM_CORE_CON0 MT6358_LDO_VCAMD_CON0 +#define MT6358_LDO_VSRAM_CORE_DBG0 0x1cb6 +#define MT6358_LDO_VSRAM_CORE_DBG1 0x1cb8 +#define MT6358_VM18_ANA_CON0 MT6358_VCAMA1_ANA_CON0 +#define MT6358_VMDDR_ANA_CON0 MT6358_VCAMD_ANA_CON0 + #endif /* __MFD_MT6358_REGISTERS_H__ */ diff --git a/include/linux/regulator/mt6358-regulator.h b/include/linux/regulator/mt6358-regulator.h index c71a6a9fce7a..562386f9b80e 100644 --- a/include/linux/regulator/mt6358-regulator.h +++ b/include/linux/regulator/mt6358-regulator.h @@ -86,6 +86,9 @@ enum { MT6366_ID_VMC, MT6366_ID_VAUD28, MT6366_ID_VSIM2, + MT6366_ID_VM18, + MT6366_ID_VMDDR, + MT6366_ID_VSRAM_CORE, MT6366_ID_RG_MAX, }; -- cgit v1.2.3 From d844fe65f0957024c3e1b0bf2a0615246184d9bc Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 7 Aug 2023 20:03:57 -0700 Subject: sched/headers: Move 'struct sched_param' out of uapi, to work around glibc/musl breakage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both glibc and musl define 'struct sched_param' in sched.h, while kernel has it in uapi/linux/sched/types.h, making it cumbersome to use sched_getattr(2) or sched_setattr(2) from userspace. For example, something like this: #include #include struct sched_attr sa; will result in "error: redefinition of ‘struct sched_param’" (note the code doesn't need sched_param at all -- it needs struct sched_attr plus some stuff from sched.h). The situation is, glibc is not going to provide a wrapper for sched_{get,set}attr, thus the need to include linux/sched_types.h directly, which leads to the above problem. Thus, the userspace is left with a few sub-par choices when it wants to use e.g. sched_setattr(2), such as maintaining a copy of struct sched_attr definition, or using some other ugly tricks. OTOH, 'struct sched_param' is well known, defined in POSIX, and it won't be ever changed (as that would break backward compatibility). So, while 'struct sched_param' is indeed part of the kernel uapi, exposing it the way it's done now creates an issue, and hiding it (like this patch does) fixes that issue, hopefully without creating another one: common userspace software rely on libc headers, and as for "special" software (like libc), it looks like glibc and musl do not rely on kernel headers for 'struct sched_param' definition (but let's Cc their mailing lists in case it's otherwise). The alternative to this patch would be to move struct sched_attr to, say, linux/sched.h, or linux/sched/attr.h (the new file). Oh, and here is the previous attempt to fix the issue: https://lore.kernel.org/all/20200528135552.GA87103@google.com/ While I support Linus arguments, the issue is still here and needs to be fixed. [ mingo: Linus is right, this shouldn't be needed - but on the other hand I agree that this header is not really helpful to user-space as-is. So let's pretend that is only about sched_attr, and call this commit a workaround for user-space breakage that it in reality is ... Also, remove the Fixes tag. ] Signed-off-by: Kir Kolyshkin Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230808030357.1213829-1-kolyshkin@gmail.com --- include/linux/sched.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index dc37ae787e33..e4235bbfad77 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -63,7 +63,6 @@ struct robust_list_head; struct root_domain; struct rq; struct sched_attr; -struct sched_param; struct seq_file; struct sighand_struct; struct signal_struct; @@ -370,6 +369,10 @@ extern struct root_domain def_root_domain; extern struct mutex sched_domains_mutex; #endif +struct sched_param { + int sched_priority; +}; + struct sched_info { #ifdef CONFIG_SCHED_INFO /* Cumulative counters: */ -- cgit v1.2.3 From affccb16c117d188eb09495cbdea149cecbf00b9 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 6 Sep 2023 11:22:32 +0200 Subject: ata: ahci: print the lpm policy on boot The target LPM policy can be set using either a Kconfig or a kernel module parameter. However, if the board type is set to anything but board_ahci_low_power, then the LPM policy will overridden and set to ATA_LPM_UNKNOWN. Additionally, if the default suspend is suspend to idle, depending on the hardware capabilities of the HBA, ahci_update_initial_lpm_policy() might override the LPM policy to either ATA_LPM_MIN_POWER_WITH_PARTIAL or ATA_LPM_MIN_POWER. All this means that it is very hard to know which LPM policy a user will actually be using on a given system. In order to make it easier to debug LPM related issues, print the LPM policy on boot. One common LPM related issue is that the device fails to link up. Because of that, we cannot add this print to ata_dev_configure(), as that function is only called after a successful link up. Instead, add the info using ata_port_desc(), with the help of a new ata_port_desc_misc() helper. The port description is printed once per port during boot. Before changes: ata1: SATA max UDMA/133 abar m524288@0xa5780000 port 0xa5780100 irq 170 ata2: SATA max UDMA/133 abar m524288@0xa5780000 port 0xa5780180 irq 170 After changes: ata1: SATA max UDMA/133 abar m524288@0xa5780000 port 0xa5780100 irq 170 lpm-pol 4 ata2: SATA max UDMA/133 abar m524288@0xa5780000 port 0xa5780180 irq 170 lpm-pol 4 Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal --- include/linux/libata.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 2a7d2af0ed80..54a217868ad0 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1561,6 +1561,11 @@ void ata_port_desc(struct ata_port *ap, const char *fmt, ...); extern void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, const char *name); #endif +static inline void ata_port_desc_misc(struct ata_port *ap, int irq) +{ + ata_port_desc(ap, "irq %d", irq); + ata_port_desc(ap, "lpm-pol %d", ap->target_lpm_policy); +} static inline bool ata_tag_internal(unsigned int tag) { -- cgit v1.2.3 From 0e19548145d863c48c6562add768a05231d768c9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:52:11 -0700 Subject: ata: libata: Annotate struct ata_cpr_log with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct ata_cpr_log. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Damien Le Moal Cc: linux-ide@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 54a217868ad0..b39891320271 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -656,7 +656,7 @@ struct ata_cpr { struct ata_cpr_log { u8 nr_cpr; - struct ata_cpr cpr[]; + struct ata_cpr cpr[] __counted_by(nr_cpr); }; struct ata_device { -- cgit v1.2.3 From 1b947279798fd51bef46e8241102f5b896add021 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 28 Aug 2023 13:10:28 +0900 Subject: ata: libata: Cleanup inline DMA helper functions Simplify the inline DMA helper functions ata_using_mwdma(), ata_using_udma() and ata_dma_enabled() to directly return as a boolean the result of their test condition. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Tested-by: Chia-Lin Kao (AceLan) Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen --- include/linux/libata.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index b39891320271..1dbb14daccfa 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1886,23 +1886,21 @@ static inline unsigned long ata_deadline(unsigned long from_jiffies, change in future hardware and specs, secondly 0xFF means 'no DMA' but is > UDMA_0. Dyma ddreigiau */ -static inline int ata_using_mwdma(struct ata_device *adev) +static inline bool ata_using_mwdma(struct ata_device *adev) { - if (adev->dma_mode >= XFER_MW_DMA_0 && adev->dma_mode <= XFER_MW_DMA_4) - return 1; - return 0; + return adev->dma_mode >= XFER_MW_DMA_0 && + adev->dma_mode <= XFER_MW_DMA_4; } -static inline int ata_using_udma(struct ata_device *adev) +static inline bool ata_using_udma(struct ata_device *adev) { - if (adev->dma_mode >= XFER_UDMA_0 && adev->dma_mode <= XFER_UDMA_7) - return 1; - return 0; + return adev->dma_mode >= XFER_UDMA_0 && + adev->dma_mode <= XFER_UDMA_7; } -static inline int ata_dma_enabled(struct ata_device *adev) +static inline bool ata_dma_enabled(struct ata_device *adev) { - return (adev->dma_mode == 0xFF ? 0 : 1); + return adev->dma_mode != 0xFF; } /************************************************************************** -- cgit v1.2.3 From c2a36609dab3b7c937ab95bfb8b98e72391f772e Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 19 Sep 2023 10:51:47 +0200 Subject: tty: switch tty_{,un}throttle_safe() to return a bool They return 0 or 1 -- a boolean value, so make it clear than noone should expect negative or other values. Signed-off-by: "Jiri Slaby (SUSE)" Link: https://lore.kernel.org/r/20230919085156.1578-7-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index f002d0f25db7..59d675f345e9 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -416,8 +416,8 @@ unsigned int tty_chars_in_buffer(struct tty_struct *tty); unsigned int tty_write_room(struct tty_struct *tty); void tty_driver_flush_buffer(struct tty_struct *tty); void tty_unthrottle(struct tty_struct *tty); -int tty_throttle_safe(struct tty_struct *tty); -int tty_unthrottle_safe(struct tty_struct *tty); +bool tty_throttle_safe(struct tty_struct *tty); +bool tty_unthrottle_safe(struct tty_struct *tty); int tty_do_resize(struct tty_struct *tty, struct winsize *ws); int tty_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount); -- cgit v1.2.3 From 71067fb797e074c468221793fe93ba1b58350f1e Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 19 Sep 2023 10:51:50 +0200 Subject: tty: fix kernel-doc for functions in tty.h tty_kref_get() is already included in Documentation, but is not properly formatted. Fix this. tty_get_baud_rate() is neither properly formatted, nor is included. Fix both. Signed-off-by: "Jiri Slaby (SUSE)" Link: https://lore.kernel.org/r/20230919085156.1578-10-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 59d675f345e9..4b6340ac2af2 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -390,14 +390,12 @@ int vcs_init(void); extern const struct class tty_class; /** - * tty_kref_get - get a tty reference - * @tty: tty device + * tty_kref_get - get a tty reference + * @tty: tty device * - * Return a new reference to a tty object. The caller must hold - * sufficient locks/counts to ensure that their existing reference cannot - * go away + * Returns: a new reference to a tty object. The caller must hold sufficient + * locks/counts to ensure that their existing reference cannot go away */ - static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) { if (tty) @@ -435,14 +433,13 @@ void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, speed_t obaud); /** - * tty_get_baud_rate - get tty bit rates - * @tty: tty to query + * tty_get_baud_rate - get tty bit rates + * @tty: tty to query * - * Returns the baud rate as an integer for this terminal. The - * termios lock must be held by the caller and the terminal bit - * flags may be updated. + * Returns: the baud rate as an integer for this terminal. The termios lock + * must be held by the caller and the terminal bit flags may be updated. * - * Locking: none + * Locking: none */ static inline speed_t tty_get_baud_rate(struct tty_struct *tty) { -- cgit v1.2.3 From 29bff582b74ed0bdb7e6986482ad9e6799ea4d2f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 26 Sep 2023 21:41:28 -0700 Subject: serial: core: fix kernel-doc for uart_port_unlock_irqrestore() Fix the function name to avoid a kernel-doc warning: include/linux/serial_core.h:666: warning: expecting prototype for uart_port_lock_irqrestore(). Prototype was for uart_port_unlock_irqrestore() instead Fixes: b0af4bcb4946 ("serial: core: Provide port lock wrappers") Signed-off-by: Randy Dunlap Cc: Thomas Gleixner Cc: John Ogness Cc: linux-serial@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Jiri Slaby Reviewed-by: John Ogness Link: https://lore.kernel.org/r/20230927044128.4748-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 3091c62ec37b..89f7b6c63598 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -658,7 +658,7 @@ static inline void uart_port_unlock_irq(struct uart_port *up) } /** - * uart_port_lock_irqrestore - Unlock the UART port, restore interrupts + * uart_port_unlock_irqrestore - Unlock the UART port, restore interrupts * @up: Pointer to UART port structure * @flags: The saved interrupt flags for restore */ -- cgit v1.2.3 From 3cd39bc3b11b8d34b7d7c961a35fdfd18b0ebf75 Mon Sep 17 00:00:00 2001 From: Alejandro Colomar Date: Tue, 3 Oct 2023 14:59:53 +0300 Subject: kernel.h: Move ARRAY_SIZE() to a separate header Touching files so used for the kernel, forces 'make' to recompile most of the kernel. Having those definitions in more granular files helps avoid recompiling so much of the kernel. Signed-off-by: Alejandro Colomar Reviewed-by: Giovanni Cabiddu Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230817143352.132583-2-lucas.segarra.fernandez@intel.com [andy: reduced to cover only string.h for now] Signed-off-by: Andy Shevchenko --- include/linux/array_size.h | 13 +++++++++++++ include/linux/kernel.h | 7 +------ include/linux/string.h | 1 + 3 files changed, 15 insertions(+), 6 deletions(-) create mode 100644 include/linux/array_size.h (limited to 'include/linux') diff --git a/include/linux/array_size.h b/include/linux/array_size.h new file mode 100644 index 000000000000..06d7d83196ca --- /dev/null +++ b/include/linux/array_size.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_ARRAY_SIZE_H +#define _LINUX_ARRAY_SIZE_H + +#include + +/** + * ARRAY_SIZE - get the number of elements in array @arr + * @arr: array to be sized + */ +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) + +#endif /* _LINUX_ARRAY_SIZE_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index cee8fe87e9f4..d9ad21058eed 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -50,12 +51,6 @@ #define READ 0 #define WRITE 1 -/** - * ARRAY_SIZE - get the number of elements in array @arr - * @arr: array to be sized - */ -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) - #define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL) #define u64_to_user_ptr(x) ( \ diff --git a/include/linux/string.h b/include/linux/string.h index dbfc66400050..3c920b6d609b 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -2,6 +2,7 @@ #ifndef _LINUX_STRING_H_ #define _LINUX_STRING_H_ +#include #include /* for inline */ #include /* for size_t */ #include /* for NULL */ -- cgit v1.2.3 From 82cc14c9930c7613da2fcb41a8d4f90c8b4cb048 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Oct 2023 15:10:11 +0300 Subject: pinctrl: Replace kernel.h by what is actually being used The kernel.h is a mess of unrelated things and we only used it as a proxy to array_size.h, hence switch from former to the latter. While at it, group and sort the headers where it makes sense. Signed-off-by: Andy Shevchenko --- include/linux/pinctrl/machine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h index 0639b36f43c5..ee8803f6ad07 100644 --- a/include/linux/pinctrl/machine.h +++ b/include/linux/pinctrl/machine.h @@ -11,7 +11,7 @@ #ifndef __LINUX_PINCTRL_MACHINE_H #define __LINUX_PINCTRL_MACHINE_H -#include /* ARRAY_SIZE() */ +#include #include -- cgit v1.2.3 From 75cec20345fa8e05a2b5f861fada95ad8e165257 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 8 Sep 2023 16:32:15 +0200 Subject: bpf: Remove xdp_do_flush_map(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xdp_do_flush_map() can be removed because there is no more user in tree. Remove xdp_do_flush_map(). Signed-off-by: Sebastian Andrzej Siewior Acked-by: Toke Høiland-Jørgensen Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/20230908143215.869913-3-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/filter.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 27406aee2d40..e8822bd595f9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1025,12 +1025,6 @@ int xdp_do_redirect_frame(struct net_device *dev, struct bpf_prog *prog); void xdp_do_flush(void); -/* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as - * it is no longer only flushing maps. Keep this define for compatibility - * until all drivers are updated - do not use xdp_do_flush_map() in new code! - */ -#define xdp_do_flush_map xdp_do_flush - void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act); #ifdef CONFIG_INET -- cgit v1.2.3 From ccab211af3c2b90ed792eb5f33707d2f0d59fe50 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Mon, 10 Jul 2023 18:51:24 +0000 Subject: syscalls: Cleanup references to sys_lookup_dcookie() commit 'be65de6b03aa ("fs: Remove dcookies support")' removed the syscall definition for lookup_dcookie. However, syscall tables still point to the old sys_lookup_dcookie() definition. Update syscall tables of all architectures to directly point to sys_ni_syscall() instead. Signed-off-by: Sohil Mehta Reviewed-by: Randy Dunlap Acked-by: Namhyung Kim # for perf Acked-by: Russell King (Oracle) Acked-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann --- include/linux/compat.h | 1 - include/linux/syscalls.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 1cfa4f0f490a..233f61ec8afc 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -581,7 +581,6 @@ asmlinkage long compat_sys_io_pgetevents_time64(compat_aio_context_t ctx_id, struct io_event __user *events, struct __kernel_timespec __user *timeout, const struct __compat_aio_sigset __user *usig); -asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t); asmlinkage long compat_sys_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, int timeout, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 22bc6bc147f8..a031613bf966 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -355,7 +355,6 @@ asmlinkage long sys_lremovexattr(const char __user *path, const char __user *name); asmlinkage long sys_fremovexattr(int fd, const char __user *name); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); -asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user *buf, size_t len); asmlinkage long sys_eventfd2(unsigned int count, int flags); asmlinkage long sys_epoll_create1(int flags); asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, -- cgit v1.2.3 From 26dd68d293fd1c5ac966fb5dd5f6d89de322a541 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Tue, 12 Sep 2023 07:59:31 -0400 Subject: overflow: add DEFINE_FLEX() for on-stack allocs Add DEFINE_FLEX() macro for on-stack allocations of structs with flexible array member. Expose __struct_size() macro outside of fortify-string.h, as it could be used to read size of structs allocated by DEFINE_FLEX(). Move __member_size() alongside it. -Kees Using underlying array for on-stack storage lets us to declare known-at-compile-time structures without kzalloc(). Actual usage for ice driver is in following patches of the series. Missing __has_builtin() workaround is moved up to serve also assembly compilation with m68k-linux-gcc, see [1]. Error was (note the .S file extension): In file included from ../include/linux/linkage.h:5, from ../arch/m68k/fpsp040/skeleton.S:40: ../include/linux/compiler_types.h:331:5: warning: "__has_builtin" is not defined, evaluates to 0 [-Wundef] 331 | #if __has_builtin(__builtin_dynamic_object_size) | ^~~~~~~~~~~~~ ../include/linux/compiler_types.h:331:18: error: missing binary operator before token "(" 331 | #if __has_builtin(__builtin_dynamic_object_size) | ^ [1] https://lore.kernel.org/netdev/202308112122.OuF0YZqL-lkp@intel.com/ Co-developed-by: Kees Cook Signed-off-by: Kees Cook Signed-off-by: Przemek Kitszel Link: https://lore.kernel.org/r/20230912115937.1645707-2-przemyslaw.kitszel@intel.com Signed-off-by: Jakub Kicinski --- include/linux/compiler_types.h | 32 +++++++++++++++++++++----------- include/linux/fortify-string.h | 4 ---- include/linux/overflow.h | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index c523c6683789..6f1ca49306d2 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -2,6 +2,15 @@ #ifndef __LINUX_COMPILER_TYPES_H #define __LINUX_COMPILER_TYPES_H +/* + * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21. + * In the meantime, to support gcc < 10, we implement __has_builtin + * by hand. + */ +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + #ifndef __ASSEMBLY__ /* @@ -134,17 +143,6 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # define __preserve_most #endif -/* Builtins */ - -/* - * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21. - * In the meantime, to support gcc < 10, we implement __has_builtin - * by hand. - */ -#ifndef __has_builtin -#define __has_builtin(x) (0) -#endif - /* Compiler specific macros. */ #ifdef __clang__ #include @@ -352,6 +350,18 @@ struct ftrace_likely_data { # define __realloc_size(x, ...) #endif +/* + * When the size of an allocated object is needed, use the best available + * mechanism to find it. (For cases where sizeof() cannot be used.) + */ +#if __has_builtin(__builtin_dynamic_object_size) +#define __struct_size(p) __builtin_dynamic_object_size(p, 0) +#define __member_size(p) __builtin_dynamic_object_size(p, 1) +#else +#define __struct_size(p) __builtin_object_size(p, 0) +#define __member_size(p) __builtin_object_size(p, 1) +#endif + #ifndef asm_volatile_goto #define asm_volatile_goto(x...) asm goto(x) #endif diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index da51a83b2829..1e7711185ec6 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -93,13 +93,9 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) #if __has_builtin(__builtin_dynamic_object_size) #define POS __pass_dynamic_object_size(1) #define POS0 __pass_dynamic_object_size(0) -#define __struct_size(p) __builtin_dynamic_object_size(p, 0) -#define __member_size(p) __builtin_dynamic_object_size(p, 1) #else #define POS __pass_object_size(1) #define POS0 __pass_object_size(0) -#define __struct_size(p) __builtin_object_size(p, 0) -#define __member_size(p) __builtin_object_size(p, 1) #endif #define __compiletime_lessthan(bounds, length) ( \ diff --git a/include/linux/overflow.h b/include/linux/overflow.h index f9b60313eaea..7b5cf4a5cd19 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -309,4 +309,39 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) #define struct_size_t(type, member, count) \ struct_size((type *)NULL, member, count) +/** + * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass (different) initializer. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * @initializer: initializer expression (could be empty for no init). + */ +#define _DEFINE_FLEX(type, name, member, count, initializer) \ + _Static_assert(__builtin_constant_p(count), \ + "onstack flex array members require compile-time const count"); \ + union { \ + u8 bytes[struct_size_t(type, member, count)]; \ + type obj; \ + } name##_u initializer; \ + type *name = (type *)&name##_u + +/** + * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing + * flexible array member. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * + * Define a zeroed, on-stack, instance of @type structure with a trailing + * flexible array member. + * Use __struct_size(@name) to get compile-time size of it afterwards. + */ +#define DEFINE_FLEX(type, name, member, count) \ + _DEFINE_FLEX(type, name, member, count, = {}) + #endif /* __LINUX_OVERFLOW_H */ -- cgit v1.2.3 From eeb6d1d6f4ec0e304608f72c3ead584bbb155714 Mon Sep 17 00:00:00 2001 From: GuoHua Cheng Date: Wed, 27 Sep 2023 13:56:08 +0800 Subject: PNP: Clean up coding style in pnp.h Address the following checkpatch complaints: ERROR: "foo * bar" should be "foo *bar" ERROR: space required after that ';' (ctx:VxV) Signed-off-by: GuoHua Cheng [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/pnp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index c2a7cfbca713..267fb8a4fb6e 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -291,7 +291,7 @@ static inline void pnp_set_drvdata(struct pnp_dev *pdev, void *data) struct pnp_fixup { char id[7]; - void (*quirk_function) (struct pnp_dev * dev); /* fixup function */ + void (*quirk_function) (struct pnp_dev *dev); /* fixup function */ }; /* config parameters */ @@ -419,8 +419,8 @@ struct pnp_protocol { /* protocol specific suspend/resume */ bool (*can_wakeup) (struct pnp_dev *dev); - int (*suspend) (struct pnp_dev * dev, pm_message_t state); - int (*resume) (struct pnp_dev * dev); + int (*suspend) (struct pnp_dev *dev, pm_message_t state); + int (*resume) (struct pnp_dev *dev); /* used by pnp layer only (look but don't touch) */ unsigned char number; /* protocol number */ @@ -492,7 +492,7 @@ static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_disable_dev(struct pnp_dev *dev) { return -ENODEV; } -static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0;} +static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0; } /* protocol helpers */ static inline int pnp_is_active(struct pnp_dev *dev) { return 0; } -- cgit v1.2.3 From 473267a4911f2469722c74ca58087d951072f72a Mon Sep 17 00:00:00 2001 From: Patrick Rohr Date: Mon, 25 Sep 2023 14:47:11 -0700 Subject: net: add sysctl to disable rfc4862 5.5.3e lifetime handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds a sysctl to opt-out of RFC4862 section 5.5.3e's valid lifetime derivation mechanism. RFC4862 section 5.5.3e prescribes that the valid lifetime in a Router Advertisement PIO shall be ignored if it less than 2 hours and to reset the lifetime of the corresponding address to 2 hours. An in-progress 6man draft (see draft-ietf-6man-slaac-renum-07 section 4.2) is currently looking to remove this mechanism. While this draft has not been moving particularly quickly for other reasons, there is widespread consensus on section 4.2 which updates RFC4862 section 5.5.3e. Cc: Maciej Żenczykowski Cc: Lorenzo Colitti Cc: Jen Linkova Signed-off-by: Patrick Rohr Reviewed-by: Jiri Pirko Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20230925214711.959704-1-prohr@google.com Signed-off-by: Jakub Kicinski --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e400ff757f13..5e605e384aac 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -82,6 +82,7 @@ struct ipv6_devconf { __u32 ioam6_id_wide; __u8 ioam6_enabled; __u8 ndisc_evict_nocarrier; + __u8 ra_honor_pio_life; struct ctl_table_header *sysctl_header; }; -- cgit v1.2.3 From 2632bb84d1d53cfd6cf65261064273ded4f759d5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 28 Sep 2023 20:24:34 -0700 Subject: mm: Remove unused vm_brk() With fs/binfmt_elf.c fully refactored to use the new elf_load() helper, there are no more users of vm_brk(), so remove it. Cc: Andrew Morton Cc: linux-mm@kvack.org Suggested-by: Eric Biederman Tested-by: Pedro Falcato Signed-off-by: Sebastian Ott Link: https://lore.kernel.org/r/20230929032435.2391507-6-keescook@chromium.org Signed-off-by: Kees Cook --- include/linux/mm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index bf5d0b1b16f4..216dd0c6dcf8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3308,8 +3308,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len) static inline void mm_populate(unsigned long addr, unsigned long len) {} #endif -/* These take the mm semaphore themselves */ -extern int __must_check vm_brk(unsigned long, unsigned long); +/* This takes the mm semaphore itself */ extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); extern unsigned long __must_check vm_mmap(struct file *, unsigned long, -- cgit v1.2.3 From 2f3dd39e2b492bec366487a2c9bcbdbd7792f77c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 2 Oct 2023 17:34:28 -0700 Subject: platform/chrome: cros_ec_proto: Mark outdata as const The 'outdata' is copied to the data buffer in cros_ec_cmd() before being sent over to the EC. Mark the argument as const so that callers can pass const pointers to this function and so that callers know the data won't be modified. Cc: Prashant Malani Signed-off-by: Stephen Boyd Acked-by: Prashant Malani Link: https://lore.kernel.org/r/20231003003429.1378109-5-swboyd@chromium.org Signed-off-by: Tzung-Bi Shih --- include/linux/platform_data/cros_ec_proto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 4f9f756bc17c..8865e350c12a 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -258,7 +258,7 @@ bool cros_ec_check_features(struct cros_ec_dev *ec, int feature); int cros_ec_get_sensor_count(struct cros_ec_dev *ec); -int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, void *outdata, +int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, const void *outdata, size_t outsize, void *indata, size_t insize); /** -- cgit v1.2.3 From 8874e414fe78718d0f2861fe511cecbd1cd73f4d Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 3 Oct 2023 11:49:14 -0700 Subject: platform/x86/intel/tpmi: Add defines to get version information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add defines to get major and minor version from a TPMI version field value. This will avoid code duplication to convert in every feature driver. Also add define for invalid version field. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20231003184916.1860084-2-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/intel_tpmi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h index 04d937ad4dc4..ee07393445f9 100644 --- a/include/linux/intel_tpmi.h +++ b/include/linux/intel_tpmi.h @@ -6,6 +6,12 @@ #ifndef _INTEL_TPMI_H_ #define _INTEL_TPMI_H_ +#include + +#define TPMI_VERSION_INVALID 0xff +#define TPMI_MINOR_VERSION(val) FIELD_GET(GENMASK(4, 0), val) +#define TPMI_MAJOR_VERSION(val) FIELD_GET(GENMASK(7, 5), val) + /** * struct intel_tpmi_plat_info - Platform information for a TPMI device instance * @package_id: CPU Package id -- cgit v1.2.3 From 968118fcf0546ef74cf306bf8f8c1e06efff10e3 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 12 Sep 2023 10:44:52 +0200 Subject: OMAP/gpio: drop MPUIO static base The OMAP GPIO driver hardcodes the MPIO chip base, but there is no point: we have already moved all consumers over to using descriptor look-ups. Drop the MPUIO GPIO base and use dynamic assignment. Root out the unused instances of the OMAP_MPUIO() macro and delete the unused OMAP_GPIO_IS_MPUIO() macro. Signed-off-by: Linus Walleij Reviewed-by: Tony Lindgren Tested-by: Janusz Krzysztofik Signed-off-by: Bartosz Golaszewski --- include/linux/platform_data/gpio-omap.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h index f377817ce75c..cdd8cfb424f5 100644 --- a/include/linux/platform_data/gpio-omap.h +++ b/include/linux/platform_data/gpio-omap.h @@ -144,9 +144,6 @@ #define OMAP_MAX_GPIO_LINES 192 -#define OMAP_MPUIO(nr) (OMAP_MAX_GPIO_LINES + (nr)) -#define OMAP_GPIO_IS_MPUIO(nr) ((nr) >= OMAP_MAX_GPIO_LINES) - #ifndef __ASSEMBLER__ struct omap_gpio_reg_offs { u16 revision; -- cgit v1.2.3 From 36aa129f221c9070afd8dff03154ab49702a5b1b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 27 Sep 2023 16:29:21 +0200 Subject: gpiolib: make gpio_device_get() and gpio_device_put() public In order to start migrating away from accessing struct gpio_chip by users other than their owners, let's first make the reference management functions for the opaque struct gpio_device public in the driver.h header. Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko --- include/linux/gpio/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 8f0859ba7065..a2060dc3344b 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -606,6 +606,9 @@ int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, struct gpio_chip *gpiochip_find(void *data, int (*match)(struct gpio_chip *gc, void *data)); +struct gpio_device *gpio_device_get(struct gpio_device *gdev); +void gpio_device_put(struct gpio_device *gdev); + bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset); int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_relres_irq(struct gpio_chip *gc, unsigned int offset); -- cgit v1.2.3 From 9e4555d1e54a18946d7ca363b9fc8ed1fe7dfde4 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 27 Sep 2023 16:29:22 +0200 Subject: gpiolib: add support for scope-based management to gpio_device As the few users that need to get the reference to the GPIO device often release it right after inspecting its properties, let's add support for the automatic reference release to struct gpio_device. Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko --- include/linux/gpio/driver.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index a2060dc3344b..1cedbc3d3200 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -3,6 +3,8 @@ #define __LINUX_GPIO_DRIVER_H #include +#include +#include #include #include #include @@ -609,6 +611,9 @@ struct gpio_chip *gpiochip_find(void *data, struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); +DEFINE_FREE(gpio_device_put, struct gpio_device *, + if (IS_ERR_OR_NULL(_T)) gpio_device_put(_T)); + bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset); int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_relres_irq(struct gpio_chip *gc, unsigned int offset); -- cgit v1.2.3 From cfe102f63308c8c8e01199a682868a64b83f653e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 27 Sep 2023 16:29:23 +0200 Subject: gpiolib: provide gpio_device_find() gpiochip_find() is wrong and its kernel doc is misleading as the function doesn't return a reference to the gpio_chip but just a raw pointer. The chip itself is not guaranteed to stay alive, in fact it can be deleted at any point. Also: other than GPIO drivers themselves, nobody else has any business accessing gpio_chip structs. Provide a new gpio_device_find() function that returns a real reference to the opaque gpio_device structure that is guaranteed to stay alive for as long as there are active users of it. Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij --- include/linux/gpio/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 1cedbc3d3200..6ad1f1a8ef2e 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -608,6 +608,9 @@ int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, struct gpio_chip *gpiochip_find(void *data, int (*match)(struct gpio_chip *gc, void *data)); +struct gpio_device *gpio_device_find(void *data, + int (*match)(struct gpio_chip *gc, void *data)); + struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); -- cgit v1.2.3 From d62fcd9f1897d587f8f9db3f77ccae8797a44b5d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 27 Sep 2023 16:29:24 +0200 Subject: gpiolib: provide gpio_device_find_by_label() By far the most common way of looking up GPIO devices is using their label. Provide a helpers for that to avoid every user implementing their own matching function. Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij --- include/linux/gpio/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 6ad1f1a8ef2e..24996cba6465 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -610,6 +610,7 @@ struct gpio_chip *gpiochip_find(void *data, struct gpio_device *gpio_device_find(void *data, int (*match)(struct gpio_chip *gc, void *data)); +struct gpio_device *gpio_device_find_by_label(const char *label); struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); -- cgit v1.2.3 From 93548f8bbbbf5b62dbd37c8f61a037a06666787b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 27 Sep 2023 16:29:25 +0200 Subject: gpiolib: provide gpio_device_get_desc() Getting the GPIO descriptor directly from the gpio_chip struct is dangerous as we don't take the reference to the underlying GPIO device. In order to start working towards removing gpiochip_get_desc(), let's provide a safer variant that works with an existing reference to struct gpio_device. Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij --- include/linux/gpio/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 24996cba6465..3fdf3f14bb13 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -770,6 +770,8 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, void gpiochip_free_own_desc(struct gpio_desc *desc); struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum); +struct gpio_desc * +gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum); #ifdef CONFIG_GPIOLIB -- cgit v1.2.3 From 9b418780844c677669ab474f6f29940ef545c954 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 27 Sep 2023 16:29:26 +0200 Subject: gpiolib: reluctantly provide gpio_device_get_chip() The process of converting all unauthorized users of struct gpio_chip to using dedicated struct gpio_device function will be long so in the meantime we must provide a way of retrieving the pointer to struct gpio_chip from a GPIO device. Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij --- include/linux/gpio/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 3fdf3f14bb13..f8ad7f40100c 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -773,6 +773,8 @@ struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum); struct gpio_desc * gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum); +struct gpio_chip *gpio_device_get_chip(struct gpio_device *gdev); + #ifdef CONFIG_GPIOLIB /* lock/unlock as IRQ */ -- cgit v1.2.3 From 8e56b063c86569e51eed1c5681ce6361fa97fc7a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 3 Oct 2023 13:17:53 -0400 Subject: netfilter: handle the connecting collision properly in nf_conntrack_proto_sctp In Scenario A and B below, as the delayed INIT_ACK always changes the peer vtag, SCTP ct with the incorrect vtag may cause packet loss. Scenario A: INIT_ACK is delayed until the peer receives its own INIT_ACK 192.168.1.2 > 192.168.1.1: [INIT] [init tag: 1328086772] 192.168.1.1 > 192.168.1.2: [INIT] [init tag: 1414468151] 192.168.1.2 > 192.168.1.1: [INIT ACK] [init tag: 1328086772] 192.168.1.1 > 192.168.1.2: [INIT ACK] [init tag: 1650211246] * 192.168.1.2 > 192.168.1.1: [COOKIE ECHO] 192.168.1.1 > 192.168.1.2: [COOKIE ECHO] 192.168.1.2 > 192.168.1.1: [COOKIE ACK] Scenario B: INIT_ACK is delayed until the peer completes its own handshake 192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885] 192.168.1.2 > 192.168.1.1: sctp (1) [INIT ACK] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [COOKIE ECHO] 192.168.1.2 > 192.168.1.1: sctp (1) [COOKIE ACK] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT ACK] [init tag: 3914796021] * This patch fixes it as below: In SCTP_CID_INIT processing: - clear ct->proto.sctp.init[!dir] if ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]. (Scenario E) - set ct->proto.sctp.init[dir]. In SCTP_CID_INIT_ACK processing: - drop it if !ct->proto.sctp.init[!dir] && ct->proto.sctp.vtag[!dir] && ct->proto.sctp.vtag[!dir] != ih->init_tag. (Scenario B, Scenario C) - drop it if ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] && ct->proto.sctp.vtag[!dir] != ih->init_tag. (Scenario A) In SCTP_CID_COOKIE_ACK processing: - clear ct->proto.sctp.init[dir] and ct->proto.sctp.init[!dir]. (Scenario D) Also, it's important to allow the ct state to move forward with cookie_echo and cookie_ack from the opposite dir for the collision scenarios. There are also other Scenarios where it should allow the packet through, addressed by the processing above: Scenario C: new CT is created by INIT_ACK. Scenario D: start INIT on the existing ESTABLISHED ct. Scenario E: start INIT after the old collision on the existing ESTABLISHED ct. 192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885] (both side are stopped, then start new connection again in hours) 192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 242308742] Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.") Signed-off-by: Xin Long Signed-off-by: Florian Westphal --- include/linux/netfilter/nf_conntrack_sctp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h index 625f491b95de..fb31312825ae 100644 --- a/include/linux/netfilter/nf_conntrack_sctp.h +++ b/include/linux/netfilter/nf_conntrack_sctp.h @@ -9,6 +9,7 @@ struct ip_ct_sctp { enum sctp_conntrack state; __be32 vtag[IP_CT_DIR_MAX]; + u8 init[IP_CT_DIR_MAX]; u8 last_dir; u8 flags; }; -- cgit v1.2.3 From 09361abc346102c505657db4f3ae19ed70e1b703 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Wed, 9 Aug 2023 16:00:12 +0800 Subject: dmaengine: Remove unused declaration dma_chan_cleanup() Commit f27c580c3628 ("dmaengine: remove 'bigref' infrastructure") removed the implementation but left declaration in place. Remove it. Signed-off-by: Yue Haibing Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230809080012.22000-1-yuehaibing@huawei.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c3656e590213..3df70d6131c8 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -517,8 +517,6 @@ static inline const char *dma_chan_name(struct dma_chan *chan) return dev_name(&chan->dev->device); } -void dma_chan_cleanup(struct kref *kref); - /** * typedef dma_filter_fn - callback filter for dma_request_channel * @chan: channel to be reviewed -- cgit v1.2.3 From 4785aa8028536c2be656d22c74ec1995b97056f3 Mon Sep 17 00:00:00 2001 From: Oza Pawandeep Date: Tue, 3 Oct 2023 10:33:33 -0700 Subject: cpuidle, ACPI: Evaluate LPI arch_flags for broadcast timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Arm® Functional Fixed Hardware Specification defines LPI states, which provide an architectural context loss flags field that can be used to describe the context that might be lost when an LPI state is entered. - Core context Lost - General purpose registers. - Floating point and SIMD registers. - System registers, include the System register based - generic timer for the core. - Debug register in the core power domain. - PMU registers in the core power domain. - Trace register in the core power domain. - Trace context loss - GICR - GICD Qualcomm's custom CPUs preserves the architectural state, including keeping the power domain for local timers active. when core is power gated, the local timers are sufficient to wake the core up without needing broadcast timer. The patch fixes the evaluation of cpuidle arch_flags, and moves only to broadcast timer if core context lost is defined in ACPI LPI. Fixes: a36a7fecfe60 ("ACPI / processor_idle: Add support for Low Power Idle(LPI) states") Reviewed-by: Sudeep Holla Acked-by: Rafael J. Wysocki Signed-off-by: Oza Pawandeep Link: https://lore.kernel.org/r/20231003173333.2865323-1-quic_poza@quicinc.com Signed-off-by: Will Deacon --- include/linux/acpi.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a73246c3c35e..afd94c9b8b8a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1480,6 +1480,15 @@ static inline int lpit_read_residency_count_address(u64 *address) } #endif +#ifdef CONFIG_ACPI_PROCESSOR_IDLE +#ifndef arch_get_idle_state_flags +static inline unsigned int arch_get_idle_state_flags(u32 arch_flags) +{ + return 0; +} +#endif +#endif /* CONFIG_ACPI_PROCESSOR_IDLE */ + #ifdef CONFIG_ACPI_PPTT int acpi_pptt_cpu_is_thread(unsigned int cpu); int find_acpi_cpu_topology(unsigned int cpu, int level); -- cgit v1.2.3 From c964c1f5ee96e1460606d44f80a47bdacd8fe568 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Sep 2023 22:35:59 +0200 Subject: rcu: Assume rcu_report_dead() is always called locally rcu_report_dead() has to be called locally by the CPU that is going to exit the RCU state machine. Passing a cpu argument here is error-prone and leaves the possibility for a racy remote call. Use local access instead. Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5e5f920ade90..aa351ddcbe8d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -122,7 +122,7 @@ static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) void rcu_init(void); extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); -void rcu_report_dead(unsigned int cpu); +void rcu_report_dead(void); void rcutree_migrate_callbacks(int cpu); #ifdef CONFIG_TASKS_RCU_GENERIC -- cgit v1.2.3 From bc0c3357601e3ff1b006600530079bd246ef0d82 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 23 Aug 2023 19:05:56 +0200 Subject: mm: remove remnants of SPLIT_RSS_COUNTING The feature got retired in f1a7941243c1 ("mm: convert mm's rss stats into percpu_counter"), but the patch failed to fully clean it up. Link: https://lkml.kernel.org/r/20230823170556.2281747-1-mjguzik@gmail.com Signed-off-by: Mateusz Guzik Acked-by: Shakeel Butt Signed-off-by: Andrew Morton --- include/linux/mm.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index bf5d0b1b16f4..7613150acab9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2628,14 +2628,6 @@ static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, *maxrss = hiwater_rss; } -#if defined(SPLIT_RSS_COUNTING) -void sync_mm_rss(struct mm_struct *mm); -#else -static inline void sync_mm_rss(struct mm_struct *mm) -{ -} -#endif - #ifndef CONFIG_ARCH_HAS_PTE_SPECIAL static inline int pte_special(pte_t pte) { -- cgit v1.2.3 From 91e79d22be75fec88ae58d274a7c9e49d6215099 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 23 Aug 2023 00:13:14 +0100 Subject: mm: convert DAX lock/unlock page to lock/unlock folio The one caller of DAX lock/unlock page already calls compound_head(), so use page_folio() instead, then use a folio throughout the DAX code to remove uses of page->mapping and page->index. [jane.chu@oracle.com: add comment to mf_generic_kill_procss(), simplify mf_generic_kill_procs:folio initialization] Link: https://lkml.kernel.org/r/20230908222336.186313-1-jane.chu@oracle.com Link: https://lkml.kernel.org/r/20230822231314.349200-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Jane Chu Acked-by: Naoya Horiguchi Cc: Dan Williams Cc: Jane Chu Signed-off-by: Andrew Morton --- include/linux/dax.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 22cd9902345d..b463502b16e1 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -159,8 +159,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, struct page *dax_layout_busy_page(struct address_space *mapping); struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); -dax_entry_t dax_lock_page(struct page *page); -void dax_unlock_page(struct page *page, dax_entry_t cookie); +dax_entry_t dax_lock_folio(struct folio *folio); +void dax_unlock_folio(struct folio *folio, dax_entry_t cookie); dax_entry_t dax_lock_mapping_entry(struct address_space *mapping, unsigned long index, struct page **page); void dax_unlock_mapping_entry(struct address_space *mapping, @@ -182,14 +182,14 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping, return -EOPNOTSUPP; } -static inline dax_entry_t dax_lock_page(struct page *page) +static inline dax_entry_t dax_lock_folio(struct folio *folio) { - if (IS_DAX(page->mapping->host)) + if (IS_DAX(folio->mapping->host)) return ~0UL; return 0; } -static inline void dax_unlock_page(struct page *page, dax_entry_t cookie) +static inline void dax_unlock_folio(struct folio *folio, dax_entry_t cookie) { } -- cgit v1.2.3 From b1e5a3dee255a11cbdd5a0e814829276bd33a793 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sun, 3 Sep 2023 15:13:23 +0000 Subject: mm/mremap: allow moves within the same VMA for stack moves For the stack move happening in shift_arg_pages(), the move is happening within the same VMA which spans the old and new ranges. In case the aligned address happens to fall within that VMA, allow such moves and don't abort the mremap alignment optimization. In the regular non-stack mremap case, we cannot allow any such moves as will end up destroying some part of the mapping (either the source of the move, or part of the existing mapping). So just avoid it for stack moves. Link: https://lkml.kernel.org/r/20230903151328.2981432-3-joel@joelfernandes.org Signed-off-by: Joel Fernandes (Google) Reviewed-by: Lorenzo Stoakes Cc: Kalesh Singh Cc: "Kirill A. Shutemov" Cc: Liam R. Howlett Cc: Linus Torvalds Cc: Lokesh Gidra Cc: Michal Hocko Cc: Paul E. McKenney Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7613150acab9..21c3d4e8a282 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2480,7 +2480,7 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen); extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len, - bool need_rmap_locks); + bool need_rmap_locks, bool for_stack); /* * Flags used by change_protection(). For now we make it a bitmap so -- cgit v1.2.3 From d896073fc767ebb40c11a6a9de71c390757ac64b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 7 Sep 2023 02:29:27 +0000 Subject: mm/damon/core: add more comments for nr_accesses The comment on struct damon_region about nr_accesses field looks not sufficient. Many people actually used to ask what nr_accesses mean. There is more detailed explanation of the mechanism on the comment for struct damon_attrs, but it is also ambiguous, as it doesn't specify the name of the counter for aggregating the access check results. Make those more detailed. Link: https://lkml.kernel.org/r/20230907022929.91361-10-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/damon.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index ae2664d1d5f1..266f92b34dd2 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -43,6 +43,10 @@ struct damon_addr_range { * @list: List head for siblings. * @age: Age of this region. * + * @nr_accesses is reset to zero for every &damon_attrs->aggr_interval and be + * increased for every &damon_attrs->sample_interval if an access to the region + * during the last sampling interval is found. + * * @age is initially zero, increased for each aggregation interval, and reset * to zero again if the access frequency is significantly changed. If two * regions are merged into a new region, both @nr_accesses and @age of the new @@ -472,13 +476,14 @@ struct damon_callback { * regions. * * For each @sample_interval, DAMON checks whether each region is accessed or - * not. It aggregates and keeps the access information (number of accesses to - * each region) for @aggr_interval time. DAMON also checks whether the target - * memory regions need update (e.g., by ``mmap()`` calls from the application, - * in case of virtual memory monitoring) and applies the changes for each - * @ops_update_interval. All time intervals are in micro-seconds. - * Please refer to &struct damon_operations and &struct damon_callback for more - * detail. + * not during the last @sample_interval. If such access is found, DAMON + * aggregates the information by increasing &damon_region->nr_accesses for + * @aggr_interval time. For each @aggr_interval, the count is reset. DAMON + * also checks whether the target memory regions need update (e.g., by + * ``mmap()`` calls from the application, in case of virtual memory monitoring) + * and applies the changes for each @ops_update_interval. All time intervals + * are in micro-seconds. Please refer to &struct damon_operations and &struct + * damon_callback for more detail. */ struct damon_attrs { unsigned long sample_interval; -- cgit v1.2.3 From cf0a96bd3ab4d9d8a1c92baf1a822f2ddbca3a34 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 7 Sep 2023 02:29:28 +0000 Subject: mm/damon/core: remove duplicated comment for watermarks-based deactivation The comment for explaining about watermarks-based monitoring part deactivation is duplicated in two paragraphs. Remove one. Link: https://lkml.kernel.org/r/20230907022929.91361-11-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/damon.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 266f92b34dd2..ab3089de1478 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -317,9 +317,6 @@ struct damos_access_pattern { * monitoring context are inactive, DAMON stops monitoring either, and just * repeatedly checks the watermarks. * - * If all schemes that registered to a &struct damon_ctx are inactive, DAMON - * stops monitoring and just repeatedly checks the watermarks. - * * Before applying the &action to a memory region, &struct damon_operations * implementation could check pages of the region and skip &action to respect * &filters -- cgit v1.2.3 From 7cd34dd3c9bf1d67ce7d1ab3fe8886c583ae0d9a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 11 Sep 2023 14:21:13 +0300 Subject: efi/unaccepted: do not let /proc/vmcore try to access unaccepted memory Patch series "Do not try to access unaccepted memory", v2. Support for unaccepted memory was added recently, refer commit dcdfdd40fa82 ("mm: Add support for unaccepted memory"), whereby a virtual machine may need to accept memory before it can be used. Plug a few gaps where RAM is exposed without checking if it is unaccepted memory. This patch (of 2): Support for unaccepted memory was added recently, refer commit dcdfdd40fa82 ("mm: Add support for unaccepted memory"), whereby a virtual machine may need to accept memory before it can be used. Do not let /proc/vmcore try to access unaccepted memory because it can cause the guest to fail. For /proc/vmcore, which is read-only, this means a read or mmap of unaccepted memory will return zeros. Link: https://lkml.kernel.org/r/20230911112114.91323-1-adrian.hunter@intel.com Link: https://lkml.kernel.org/r/20230911112114.91323-2-adrian.hunter@intel.com Signed-off-by: Adrian Hunter Cc: Ard Biesheuvel Cc: Baoquan He Cc: Borislav Petkov (AMD) Cc: Dave Hansen Cc: Dave Young Cc: Kirill A. Shutemov Cc: Lorenzo Stoakes Cc: Mike Rapoport Cc: Tom Lendacky Cc: Vivek Goyal Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 21c3d4e8a282..31dc25d3f6b5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4054,4 +4054,11 @@ static inline void accept_memory(phys_addr_t start, phys_addr_t end) #endif +static inline bool pfn_is_unaccepted_memory(unsigned long pfn) +{ + phys_addr_t paddr = pfn << PAGE_SHIFT; + + return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE); +} + #endif /* _LINUX_MM_H */ -- cgit v1.2.3 From 3ee0aa9f06756e959633ffda37856c6741d948ed Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:25:14 +0800 Subject: mm: move some shrinker-related function declarations to mm/internal.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "cleanups for lockless slab shrink", v4. This series is some cleanups for lockless slab shrink. This patch (of 4): The following functions are only used inside the mm subsystem, so it's better to move their declarations to the mm/internal.h file. 1. shrinker_debugfs_add() 2. shrinker_debugfs_detach() 3. shrinker_debugfs_remove() Link: https://lkml.kernel.org/r/20230911092517.64141-1-zhengqi.arch@bytedance.com Link: https://lkml.kernel.org/r/20230911092517.64141-2-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Christian Brauner Cc: Christian König Cc: Chuck Lever Cc: Daniel Vetter Cc: Darrick J. Wong Cc: Dave Chinner Cc: Greg Kroah-Hartman Cc: Joel Fernandes Cc: Kirill Tkhai Cc: Paul E. McKenney Cc: Roman Gushchin Cc: Sergey Senozhatsky Cc: Steven Price Cc: Theodore Ts'o Cc: Vlastimil Babka Cc: Daniel Vetter Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Coly Li Cc: Dai Ngo Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Sean Paul Cc: Song Liu Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- include/linux/shrinker.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 224293b2dd06..8dc15aa37410 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -106,28 +106,9 @@ extern void free_prealloced_shrinker(struct shrinker *shrinker); extern void synchronize_shrinkers(void); #ifdef CONFIG_SHRINKER_DEBUG -extern int shrinker_debugfs_add(struct shrinker *shrinker); -extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, - int *debugfs_id); -extern void shrinker_debugfs_remove(struct dentry *debugfs_entry, - int debugfs_id); extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...); #else /* CONFIG_SHRINKER_DEBUG */ -static inline int shrinker_debugfs_add(struct shrinker *shrinker) -{ - return 0; -} -static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, - int *debugfs_id) -{ - *debugfs_id = -1; - return NULL; -} -static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry, - int debugfs_id) -{ -} static inline __printf(2, 3) int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) { -- cgit v1.2.3 From 0b2f5ea1aa39c0ed34bdadb53faf519e3d84ac4a Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:25:17 +0800 Subject: drm/ttm: introduce pool_shrink_rwsem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, synchronize_shrinkers() is only used by TTM pool. It only requires that no shrinkers run in parallel. After we use RCU+refcount method to implement the lockless slab shrink, we can not use shrinker_rwsem or synchronize_rcu() to guarantee that all shrinker invocations have seen an update before freeing memory. So we introduce a new pool_shrink_rwsem to implement a private ttm_pool_synchronize_shrinkers(), so as to achieve the same purpose. Link: https://lkml.kernel.org/r/20230911092517.64141-5-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Reviewed-by: Christian König Acked-by: Daniel Vetter Cc: Christian Brauner Cc: Chuck Lever Cc: Daniel Vetter Cc: Darrick J. Wong Cc: Dave Chinner Cc: Greg Kroah-Hartman Cc: Joel Fernandes Cc: Kirill Tkhai Cc: Paul E. McKenney Cc: Roman Gushchin Cc: Sergey Senozhatsky Cc: Steven Price Cc: Theodore Ts'o Cc: Vlastimil Babka Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Coly Li Cc: Dai Ngo Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Sean Paul Cc: Song Liu Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- include/linux/shrinker.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 8dc15aa37410..6b5843c3b827 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -103,7 +103,6 @@ extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker, const char *fmt, ...); extern void unregister_shrinker(struct shrinker *shrinker); extern void free_prealloced_shrinker(struct shrinker *shrinker); -extern void synchronize_shrinkers(void); #ifdef CONFIG_SHRINKER_DEBUG extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, -- cgit v1.2.3 From c42d50aefd17a6bad3ed617769edbbb579137545 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:00 +0800 Subject: mm: shrinker: add infrastructure for dynamically allocating shrinker Patch series "use refcount+RCU method to implement lockless slab shrink", v6. 1. Background ============= We used to implement the lockless slab shrink with SRCU [1], but then kernel test robot reported -88.8% regression in stress-ng.ramfs.ops_per_sec test case [2], so we reverted it [3]. This patch series aims to re-implement the lockless slab shrink using the refcount+RCU method proposed by Dave Chinner [4]. [1]. https://lore.kernel.org/lkml/20230313112819.38938-1-zhengqi.arch@bytedance.com/ [2]. https://lore.kernel.org/lkml/202305230837.db2c233f-yujie.liu@intel.com/ [3]. https://lore.kernel.org/all/20230609081518.3039120-1-qi.zheng@linux.dev/ [4]. https://lore.kernel.org/lkml/ZIJhou1d55d4H1s0@dread.disaster.area/ 2. Implementation ================= Currently, the shrinker instances can be divided into the following three types: a) global shrinker instance statically defined in the kernel, such as workingset_shadow_shrinker. b) global shrinker instance statically defined in the kernel modules, such as mmu_shrinker in x86. c) shrinker instance embedded in other structures. For case a, the memory of shrinker instance is never freed. For case b, the memory of shrinker instance will be freed after synchronize_rcu() when the module is unloaded. For case c, the memory of shrinker instance will be freed along with the structure it is embedded in. In preparation for implementing lockless slab shrink, we need to dynamically allocate those shrinker instances in case c, then the memory can be dynamically freed alone by calling kfree_rcu(). This patchset adds the following new APIs for dynamically allocating shrinker, and add a private_data field to struct shrinker to record and get the original embedded structure. 1. shrinker_alloc() 2. shrinker_register() 3. shrinker_free() In order to simplify shrinker-related APIs and make shrinker more independent of other kernel mechanisms, this patchset uses the above APIs to convert all shrinkers (including case a and b) to dynamically allocated, and then remove all existing APIs. This will also have another advantage mentioned by Dave Chinner: ``` The other advantage of this is that it will break all the existing out of tree code and third party modules using the old API and will no longer work with a kernel using lockless slab shrinkers. They need to break (both at the source and binary levels) to stop bad things from happening due to using uncoverted shrinkers in the new setup. ``` Then we free the shrinker by calling call_rcu(), and use rcu_read_{lock,unlock}() to ensure that the shrinker instance is valid. And the shrinker::refcount mechanism ensures that the shrinker instance will not be run again after unregistration. So the structure that records the pointer of shrinker instance can be safely freed without waiting for the RCU read-side critical section. In this way, while we implement the lockless slab shrink, we don't need to be blocked in unregister_shrinker() to wait RCU read-side critical section. PATCH 1: introduce new APIs PATCH 2~38: convert all shrinnkers to use new APIs PATCH 39: remove old APIs PATCH 40~41: some cleanups and preparations PATCH 42-43: implement the lockless slab shrink PATCH 44~45: convert shrinker_rwsem to mutex 3. Testing ========== 3.1 slab shrink stress test --------------------------- We can reproduce the down_read_trylock() hotspot through the following script: ``` DIR="/root/shrinker/memcg/mnt" do_create() { mkdir -p /sys/fs/cgroup/memory/test echo 4G > /sys/fs/cgroup/memory/test/memory.limit_in_bytes for i in `seq 0 $1`; do mkdir -p /sys/fs/cgroup/memory/test/$i; echo $$ > /sys/fs/cgroup/memory/test/$i/cgroup.procs; mkdir -p $DIR/$i; done } do_mount() { for i in `seq $1 $2`; do mount -t tmpfs $i $DIR/$i; done } do_touch() { for i in `seq $1 $2`; do echo $$ > /sys/fs/cgroup/memory/test/$i/cgroup.procs; dd if=/dev/zero of=$DIR/$i/file$i bs=1M count=1 & done } case "$1" in touch) do_touch $2 $3 ;; test) do_create 4000 do_mount 0 4000 do_touch 0 3000 ;; *) exit 1 ;; esac ``` Save the above script, then run test and touch commands. Then we can use the following perf command to view hotspots: perf top -U -F 999 1) Before applying this patchset: 33.15% [kernel] [k] down_read_trylock 25.38% [kernel] [k] shrink_slab 21.75% [kernel] [k] up_read 4.45% [kernel] [k] _find_next_bit 2.27% [kernel] [k] do_shrink_slab 1.80% [kernel] [k] intel_idle_irq 1.79% [kernel] [k] shrink_lruvec 0.67% [kernel] [k] xas_descend 0.41% [kernel] [k] mem_cgroup_iter 0.40% [kernel] [k] shrink_node 0.38% [kernel] [k] list_lru_count_one 2) After applying this patchset: 64.56% [kernel] [k] shrink_slab 12.18% [kernel] [k] do_shrink_slab 3.30% [kernel] [k] __rcu_read_unlock 2.61% [kernel] [k] shrink_lruvec 2.49% [kernel] [k] __rcu_read_lock 1.93% [kernel] [k] intel_idle_irq 0.89% [kernel] [k] shrink_node 0.81% [kernel] [k] mem_cgroup_iter 0.77% [kernel] [k] mem_cgroup_calculate_protection 0.66% [kernel] [k] list_lru_count_one We can see that the first perf hotspot becomes shrink_slab, which is what we expect. 3.2 registration and unregistration stress test ----------------------------------------------- Run the command below to test: stress-ng --timeout 60 --times --verify --metrics-brief --ramfs 9 & 1) Before applying this patchset: setting to a 60 second run per stressor dispatching hogs: 9 ramfs stressor bogo ops real time usr time sys time bogo ops/s bogo ops/s (secs) (secs) (secs) (real time) (usr+sys time) ramfs 473062 60.00 8.00 279.13 7884.12 1647.59 for a 60.01s run time: 1440.34s available CPU time 7.99s user time ( 0.55%) 279.13s system time ( 19.38%) 287.12s total time ( 19.93%) load average: 7.12 2.99 1.15 successful run completed in 60.01s (1 min, 0.01 secs) 2) After applying this patchset: setting to a 60 second run per stressor dispatching hogs: 9 ramfs stressor bogo ops real time usr time sys time bogo ops/s bogo ops/s (secs) (secs) (secs) (real time) (usr+sys time) ramfs 477165 60.00 8.13 281.34 7952.55 1648.40 for a 60.01s run time: 1440.33s available CPU time 8.12s user time ( 0.56%) 281.34s system time ( 19.53%) 289.46s total time ( 20.10%) load average: 6.98 3.03 1.19 successful run completed in 60.01s (1 min, 0.01 secs) We can see that the ops/s has hardly changed. This patch (of 45): Currently, the shrinker instances can be divided into the following three types: a) global shrinker instance statically defined in the kernel, such as workingset_shadow_shrinker. b) global shrinker instance statically defined in the kernel modules, such as mmu_shrinker in x86. c) shrinker instance embedded in other structures. For case a, the memory of shrinker instance is never freed. For case b, the memory of shrinker instance will be freed after synchronize_rcu() when the module is unloaded. For case c, the memory of shrinker instance will be freed along with the structure it is embedded in. In preparation for implementing lockless slab shrink, we need to dynamically allocate those shrinker instances in case c, then the memory can be dynamically freed alone by calling kfree_rcu(). So this commit adds the following new APIs for dynamically allocating shrinker, and add a private_data field to struct shrinker to record and get the original embedded structure. 1. shrinker_alloc() Used to allocate shrinker instance itself and related memory, it will return a pointer to the shrinker instance on success and NULL on failure. 2. shrinker_register() Used to register the shrinker instance, which is same as the current register_shrinker_prepared(). 3. shrinker_free() Used to unregister (if needed) and free the shrinker instance. In order to simplify shrinker-related APIs and make shrinker more independent of other kernel mechanisms, subsequent submissions will use the above API to convert all shrinkers (including case a and b) to dynamically allocated, and then remove all existing APIs. This will also have another advantage mentioned by Dave Chinner: ``` The other advantage of this is that it will break all the existing out of tree code and third party modules using the old API and will no longer work with a kernel using lockless slab shrinkers. They need to break (both at the source and binary levels) to stop bad things from happening due to using unconverted shrinkers in the new setup. ``` [zhengqi.arch@bytedance.com: mm: shrinker: some cleanup] Link: https://lkml.kernel.org/r/20230919024607.65463-1-zhengqi.arch@bytedance.com Link: https://lkml.kernel.org/r/20230911094444.68966-1-zhengqi.arch@bytedance.com Link: https://lkml.kernel.org/r/20230911094444.68966-2-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Christian Brauner Cc: Chuck Lever Cc: Darrick J. Wong Cc: Dave Chinner Cc: Greg Kroah-Hartman Cc: Kirill Tkhai Cc: Paul E. McKenney Cc: Roman Gushchin Cc: Sergey Senozhatsky Cc: Steven Price Cc: Theodore Ts'o Cc: Vlastimil Babka Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Koenig Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Sean Paul Cc: Song Liu Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- include/linux/shrinker.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 6b5843c3b827..f4a5249f00b2 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -70,6 +70,8 @@ struct shrinker { int seeks; /* seeks to recreate an obj */ unsigned flags; + void *private_data; + /* These are for internal use */ struct list_head list; #ifdef CONFIG_MEMCG @@ -86,15 +88,22 @@ struct shrinker { }; #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ -/* Flags */ -#define SHRINKER_REGISTERED (1 << 0) -#define SHRINKER_NUMA_AWARE (1 << 1) -#define SHRINKER_MEMCG_AWARE (1 << 2) +/* Internal flags */ +#define SHRINKER_REGISTERED BIT(0) +#define SHRINKER_ALLOCATED BIT(1) + +/* Flags for users to use */ +#define SHRINKER_NUMA_AWARE BIT(2) +#define SHRINKER_MEMCG_AWARE BIT(3) /* * It just makes sense when the shrinker is also MEMCG_AWARE for now, * non-MEMCG_AWARE shrinker should not have this flag set. */ -#define SHRINKER_NONSLAB (1 << 3) +#define SHRINKER_NONSLAB BIT(4) + +struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); +void shrinker_register(struct shrinker *shrinker); +void shrinker_free(struct shrinker *shrinker); extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker, const char *fmt, ...); -- cgit v1.2.3 From 4b403dfa8ea8dfbc9d317b25a0433c1ac6765f7f Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:30 +0800 Subject: jbd2,ext4: dynamically allocate the jbd2-journal shrinker In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the jbd2-journal shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct journal_s. Link: https://lkml.kernel.org/r/20230911094444.68966-32-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: Jan Kara Cc: "Theodore Ts'o" Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 52772c826c86..6dcbb4eb80fb 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -886,7 +886,7 @@ struct journal_s * Journal head shrinker, reclaim buffer's journal head which * has been written back. */ - struct shrinker j_shrinker; + struct shrinker *j_shrinker; /** * @j_checkpoint_jh_count: -- cgit v1.2.3 From 1720f5dd8d3af34d6023fb9e8c35e5e60e8b6643 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:37 +0800 Subject: fs: super: dynamically allocate the s_shrink In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the s_shrink, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct super_block. Link: https://lkml.kernel.org/r/20230911094444.68966-39-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: David Sterba Cc: Chris Mason Cc: Josef Bacik Cc: Alexander Viro Cc: Christian Brauner Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b528f063e8ff..fd539c9fef8e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1265,7 +1265,7 @@ struct super_block { const struct dentry_operations *s_d_op; /* default d_op for dentries */ - struct shrinker s_shrink; /* per-sb shrinker handle */ + struct shrinker *s_shrink; /* per-sb shrinker handle */ /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; -- cgit v1.2.3 From f2383e01507eeee8a1c1283d61a117a97d6c4ebe Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:38 +0800 Subject: mm: shrinker: remove old APIs Now no users are using the old APIs, just remove them. Link: https://lkml.kernel.org/r/20230911094444.68966-40-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- include/linux/shrinker.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index f4a5249f00b2..1d3899f37229 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -105,14 +105,6 @@ struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); void shrinker_register(struct shrinker *shrinker); void shrinker_free(struct shrinker *shrinker); -extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker, - const char *fmt, ...); -extern void register_shrinker_prepared(struct shrinker *shrinker); -extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker, - const char *fmt, ...); -extern void unregister_shrinker(struct shrinker *shrinker); -extern void free_prealloced_shrinker(struct shrinker *shrinker); - #ifdef CONFIG_SHRINKER_DEBUG extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...); -- cgit v1.2.3 From 307bececcd1205bcb67a3c0d53a69db237ccc9d4 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:39 +0800 Subject: mm: shrinker: add a secondary array for shrinker_info::{map, nr_deferred} Currently, we maintain two linear arrays per node per memcg, which are shrinker_info::map and shrinker_info::nr_deferred. And we need to resize them when the shrinker_nr_max is exceeded, that is, allocate a new array, and then copy the old array to the new array, and finally free the old array by RCU. For shrinker_info::map, we do set_bit() under the RCU lock, so we may set the value into the old map which is about to be freed. This may cause the value set to be lost. The current solution is not to copy the old map when resizing, but to set all the corresponding bits in the new map to 1. This solves the data loss problem, but bring the overhead of more pointless loops while doing memcg slab shrink. For shrinker_info::nr_deferred, we will only modify it under the read lock of shrinker_rwsem, so it will not run concurrently with the resizing. But after we make memcg slab shrink lockless, there will be the same data loss problem as shrinker_info::map, and we can't work around it like the map. For such resizable arrays, the most straightforward idea is to change it to xarray, like we did for list_lru [1]. We need to do xa_store() in the list_lru_add()-->set_shrinker_bit(), but this will cause memory allocation, and the list_lru_add() doesn't accept failure. A possible solution is to pre-allocate, but the location of pre-allocation is not well determined (such as deferred_split_shrinker case). Therefore, this commit chooses to introduce the following secondary array for shrinker_info::{map, nr_deferred}: +---------------+--------+--------+-----+ | shrinker_info | unit 0 | unit 1 | ... | (secondary array) +---------------+--------+--------+-----+ | v +---------------+-----+ | nr_deferred[] | map | (leaf array) +---------------+-----+ (shrinker_info_unit) The leaf array is never freed unless the memcg is destroyed. The secondary array will be resized every time the shrinker id exceeds shrinker_nr_max. So the shrinker_info_unit can be indexed from both the old and the new shrinker_info->unit[x]. Then even if we get the old secondary array under the RCU lock, the found map and nr_deferred are also true, so the updated nr_deferred and map will not be lost. [1]. https://lore.kernel.org/all/20220228122126.37293-13-songmuchun@bytedance.com/ [zhengqi.arch@bytedance.com: unlock the &shrinker_rwsem before the call to free_shrinker_info()] Link: https://lkml.kernel.org/r/20230928141517.12164-1-zhengqi.arch@bytedance.com Link: https://lkml.kernel.org/r/20230911094444.68966-41-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 12 +----------- include/linux/shrinker.h | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e4e24da16d2c..031102ac9311 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -21,6 +21,7 @@ #include #include #include +#include struct mem_cgroup; struct obj_cgroup; @@ -88,17 +89,6 @@ struct mem_cgroup_reclaim_iter { unsigned int generation; }; -/* - * Bitmap and deferred work of shrinker::id corresponding to memcg-aware - * shrinkers, which have elements charged to this memcg. - */ -struct shrinker_info { - struct rcu_head rcu; - atomic_long_t *nr_deferred; - unsigned long *map; - int map_nr_max; -}; - struct lruvec_stats_percpu { /* Local (CPU and cgroup) state */ long state[NR_VM_NODE_STAT_ITEMS]; diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 1d3899f37229..ba5ac82b5dbd 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -5,6 +5,23 @@ #include #include +#define SHRINKER_UNIT_BITS BITS_PER_LONG + +/* + * Bitmap and deferred work of shrinker::id corresponding to memcg-aware + * shrinkers, which have elements charged to the memcg. + */ +struct shrinker_info_unit { + atomic_long_t nr_deferred[SHRINKER_UNIT_BITS]; + DECLARE_BITMAP(map, SHRINKER_UNIT_BITS); +}; + +struct shrinker_info { + struct rcu_head rcu; + int map_nr_max; + struct shrinker_info_unit *unit[]; +}; + /* * This struct is used to pass information from page reclaim to the shrinkers. * We consolidate the values for easier extension later. -- cgit v1.2.3 From ca1d36b823944f24b5755311e95883fb5fdb807b Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:41 +0800 Subject: mm: shrinker: make global slab shrink lockless The shrinker_rwsem is a global read-write lock in shrinkers subsystem, which protects most operations such as slab shrink, registration and unregistration of shrinkers, etc. This can easily cause problems in the following cases. 1) When the memory pressure is high and there are many filesystems mounted or unmounted at the same time, slab shrink will be affected (down_read_trylock() failed). Such as the real workload mentioned by Kirill Tkhai: ``` One of the real workloads from my experience is start of an overcommitted node containing many starting containers after node crash (or many resuming containers after reboot for kernel update). In these cases memory pressure is huge, and the node goes round in long reclaim. ``` 2) If a shrinker is blocked (such as the case mentioned in [1]) and a writer comes in (such as mount a fs), then this writer will be blocked and cause all subsequent shrinker-related operations to be blocked. Even if there is no competitor when shrinking slab, there may still be a problem. The down_read_trylock() may become a perf hotspot with frequent calls to shrink_slab(). Because of the poor multicore scalability of atomic operations, this can lead to a significant drop in IPC (instructions per cycle). We used to implement the lockless slab shrink with SRCU [2], but then kernel test robot reported -88.8% regression in stress-ng.ramfs.ops_per_sec test case [3], so we reverted it [4]. This commit uses the refcount+RCU method [5] proposed by Dave Chinner to re-implement the lockless global slab shrink. The memcg slab shrink is handled in the subsequent patch. For now, all shrinker instances are converted to dynamically allocated and will be freed by call_rcu(). So we can use rcu_read_{lock,unlock}() to ensure that the shrinker instance is valid. And the shrinker instance will not be run again after unregistration. So the structure that records the pointer of shrinker instance can be safely freed without waiting for the RCU read-side critical section. In this way, while we implement the lockless slab shrink, we don't need to be blocked in unregister_shrinker(). The following are the test results: stress-ng --timeout 60 --times --verify --metrics-brief --ramfs 9 & 1) Before applying this patchset: setting to a 60 second run per stressor dispatching hogs: 9 ramfs stressor bogo ops real time usr time sys time bogo ops/s bogo ops/s (secs) (secs) (secs) (real time) (usr+sys time) ramfs 473062 60.00 8.00 279.13 7884.12 1647.59 for a 60.01s run time: 1440.34s available CPU time 7.99s user time ( 0.55%) 279.13s system time ( 19.38%) 287.12s total time ( 19.93%) load average: 7.12 2.99 1.15 successful run completed in 60.01s (1 min, 0.01 secs) 2) After applying this patchset: setting to a 60 second run per stressor dispatching hogs: 9 ramfs stressor bogo ops real time usr time sys time bogo ops/s bogo ops/s (secs) (secs) (secs) (real time) (usr+sys time) ramfs 477165 60.00 8.13 281.34 7952.55 1648.40 for a 60.01s run time: 1440.33s available CPU time 8.12s user time ( 0.56%) 281.34s system time ( 19.53%) 289.46s total time ( 20.10%) load average: 6.98 3.03 1.19 successful run completed in 60.01s (1 min, 0.01 secs) We can see that the ops/s has hardly changed. [1]. https://lore.kernel.org/lkml/20191129214541.3110-1-ptikhomirov@virtuozzo.com/ [2]. https://lore.kernel.org/lkml/20230313112819.38938-1-zhengqi.arch@bytedance.com/ [3]. https://lore.kernel.org/lkml/202305230837.db2c233f-yujie.liu@intel.com/ [4]. https://lore.kernel.org/all/20230609081518.3039120-1-qi.zheng@linux.dev/ [5]. https://lore.kernel.org/lkml/ZIJhou1d55d4H1s0@dread.disaster.area/ Link: https://lkml.kernel.org/r/20230911094444.68966-43-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- include/linux/shrinker.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index ba5ac82b5dbd..e4f93120e0ab 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -4,6 +4,8 @@ #include #include +#include +#include #define SHRINKER_UNIT_BITS BITS_PER_LONG @@ -87,6 +89,17 @@ struct shrinker { int seeks; /* seeks to recreate an obj */ unsigned flags; + /* + * The reference count of this shrinker. Registered shrinker have an + * initial refcount of 1, then the lookup operations are now allowed + * to use it via shrinker_try_get(). Later in the unregistration step, + * the initial refcount will be discarded, and will free the shrinker + * asynchronously via RCU after its refcount reaches 0. + */ + refcount_t refcount; + struct completion done; /* use to wait for refcount to reach 0 */ + struct rcu_head rcu; + void *private_data; /* These are for internal use */ @@ -122,6 +135,17 @@ struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); void shrinker_register(struct shrinker *shrinker); void shrinker_free(struct shrinker *shrinker); +static inline bool shrinker_try_get(struct shrinker *shrinker) +{ + return refcount_inc_not_zero(&shrinker->refcount); +} + +static inline void shrinker_put(struct shrinker *shrinker) +{ + if (refcount_dec_and_test(&shrinker->refcount)) + complete(&shrinker->done); +} + #ifdef CONFIG_SHRINKER_DEBUG extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...); -- cgit v1.2.3 From 09c550508a4b8f7844b197cc16877dd0f7c42d8f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 13 Sep 2023 14:51:13 +0200 Subject: mm/rmap: pass folio to hugepage_add_anon_rmap() Let's pass a folio; we are always mapping the entire thing. Link: https://lkml.kernel.org/r/20230913125113.313322-7-david@redhat.com Signed-off-by: David Hildenbrand Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/rmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 51cc21ebb568..d22f4d21a11c 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -203,7 +203,7 @@ void folio_add_file_rmap_range(struct folio *, struct page *, unsigned int nr, void page_remove_rmap(struct page *, struct vm_area_struct *, bool compound); -void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, +void hugepage_add_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address, rmap_t flags); void hugepage_add_new_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address); -- cgit v1.2.3 From 73eab3ca481e5be0f1fd8140365d604482f84ee1 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 13 Sep 2023 17:51:27 +0800 Subject: mm: migrate: convert migrate_misplaced_page() to migrate_misplaced_folio() At present, numa balance only support base page and PMD-mapped THP, but we will expand to support to migrate large folio/pte-mapped THP in the future, it is better to make migrate_misplaced_page() to take a folio instead of a page, and rename it to migrate_misplaced_folio(), it is a preparation, also this remove several compound_head() calls. Link: https://lkml.kernel.org/r/20230913095131.2426871-5-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Matthew Wilcox (Oracle) Cc: Mike Kravetz Signed-off-by: Andrew Morton --- include/linux/migrate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 711dd9412561..2ce13e8a309b 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -142,10 +142,10 @@ const struct movable_operations *page_movable_ops(struct page *page) } #ifdef CONFIG_NUMA_BALANCING -int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, +int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int node); #else -static inline int migrate_misplaced_page(struct page *page, +static inline int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int node) { return -EAGAIN; /* can't migrate now */ -- cgit v1.2.3 From 2a41815784e029cbef571511384f00fa40f2a82e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 14 Sep 2023 16:00:04 +0100 Subject: buffer: pass GFP flags to folio_alloc_buffers() Patch series "Add and use bdev_getblk()", v2. This patch series fixes a bug reported by Hui Zhu; see proposed patches v1 and v2: https://lore.kernel.org/linux-fsdevel/20230811035705.3296-1-teawaterz@linux.alibaba.com/ https://lore.kernel.org/linux-fsdevel/20230811071519.1094-1-teawaterz@linux.alibaba.com/ I decided to go in a rather different direction for this fix, and fix a related problem at the same time. I don't think there's any urgency to rush this into Linus' tree, nor have I marked it for stable. Reasonable people may disagree. This patch (of 8): Instead of creating entirely new flags, inherit them from grow_dev_page(). The other callers create the same flags that this function used to create. Link: https://lkml.kernel.org/r/20230914150011.843330-1-willy@infradead.org Link: https://lkml.kernel.org/r/20230914150011.843330-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hui Zhu Signed-off-by: Andrew Morton --- include/linux/buffer_head.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 44e9de51eedf..67d94d2be475 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -198,7 +198,7 @@ void touch_buffer(struct buffer_head *bh); void folio_set_bh(struct buffer_head *bh, struct folio *folio, unsigned long offset); struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size, - bool retry); + gfp_t gfp); struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, bool retry); void create_empty_buffers(struct page *, unsigned long, -- cgit v1.2.3 From 3ed65f04aac4d1cd025f30ee3fac174bcbf2b018 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 14 Sep 2023 16:00:05 +0100 Subject: buffer: hoist GFP flags from grow_dev_page() to __getblk_gfp() grow_dev_page() is only called by grow_buffers(). grow_buffers() is only called by __getblk_slow() and __getblk_slow() is only called from __getblk_gfp(), so it is safe to move the GFP flags setting all the way up. With that done, add a new bdev_getblk() entry point that leaves the GFP flags the way the caller specified them. [willy@infradead.org: fix grow_dev_page() error handling] Link: https://lkml.kernel.org/r/ZRREEIwqiy5DijKB@casper.infradead.org Link: https://lkml.kernel.org/r/20230914150011.843330-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hui Zhu Cc: Dan Carpenter Signed-off-by: Andrew Morton --- include/linux/buffer_head.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 67d94d2be475..7825bb3d63a7 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -227,6 +227,8 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); +struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp); struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); -- cgit v1.2.3 From c645e65c0675dd4df7ee68b995154dc1c1e7ce3b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 14 Sep 2023 16:00:08 +0100 Subject: buffer: convert getblk_unmovable() and __getblk() to use bdev_getblk() Move these two functions up in the file for the benefit of the next patch, and pass in all of the GFP flags to use instead of the partial GFP flags used by __getblk_gfp(). Link: https://lkml.kernel.org/r/20230914150011.843330-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hui Zhu Signed-off-by: Andrew Morton --- include/linux/buffer_head.h | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 7825bb3d63a7..9a3ca5f6d63d 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -340,6 +340,28 @@ sb_breadahead(struct super_block *sb, sector_t block) __breadahead(sb->s_bdev, block, sb->s_blocksize); } +static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, + sector_t block, unsigned size) +{ + gfp_t gfp; + + gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp |= __GFP_NOFAIL; + + return bdev_getblk(bdev, block, size, gfp); +} + +static inline struct buffer_head *__getblk(struct block_device *bdev, + sector_t block, unsigned size) +{ + gfp_t gfp; + + gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp |= __GFP_MOVABLE | __GFP_NOFAIL; + + return bdev_getblk(bdev, block, size, gfp); +} + static inline struct buffer_head * sb_getblk(struct super_block *sb, sector_t block) { @@ -387,20 +409,6 @@ static inline void lock_buffer(struct buffer_head *bh) __lock_buffer(bh); } -static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, - sector_t block, - unsigned size) -{ - return __getblk_gfp(bdev, block, size, 0); -} - -static inline struct buffer_head *__getblk(struct block_device *bdev, - sector_t block, - unsigned size) -{ - return __getblk_gfp(bdev, block, size, __GFP_MOVABLE); -} - static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags) { if (!buffer_uptodate(bh) && trylock_buffer(bh)) { -- cgit v1.2.3 From 4b9c8b1919323f7f359376ca31a4c721cb2b3acf Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 14 Sep 2023 16:00:09 +0100 Subject: buffer: convert sb_getblk() to call __getblk() Now that __getblk() is in the right place in the file, it is trivial to call it from sb_getblk(). Link: https://lkml.kernel.org/r/20230914150011.843330-7-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hui Zhu Signed-off-by: Andrew Morton --- include/linux/buffer_head.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 9a3ca5f6d63d..b294e2cccbae 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -362,13 +362,12 @@ static inline struct buffer_head *__getblk(struct block_device *bdev, return bdev_getblk(bdev, block, size, gfp); } -static inline struct buffer_head * -sb_getblk(struct super_block *sb, sector_t block) +static inline struct buffer_head *sb_getblk(struct super_block *sb, + sector_t block) { - return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); + return __getblk(sb->s_bdev, block, sb->s_blocksize); } - static inline struct buffer_head * sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) { -- cgit v1.2.3 From 8a83ac54940d27b8f56d766e1cb270d150fedd50 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 14 Sep 2023 16:00:10 +0100 Subject: ext4: call bdev_getblk() from sb_getblk_gfp() Most of the callers of sb_getblk_gfp() already assumed that they were passing the entire GFP flags to use. Fix up the two callers that didn't, and remove the __GFP_NOFAIL from them since they both appear to correctly handle failure. Link: https://lkml.kernel.org/r/20230914150011.843330-8-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hui Zhu Signed-off-by: Andrew Morton --- include/linux/buffer_head.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index b294e2cccbae..22f13eece719 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -368,10 +368,10 @@ static inline struct buffer_head *sb_getblk(struct super_block *sb, return __getblk(sb->s_bdev, block, sb->s_blocksize); } -static inline struct buffer_head * -sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) +static inline struct buffer_head *sb_getblk_gfp(struct super_block *sb, + sector_t block, gfp_t gfp) { - return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp); + return bdev_getblk(sb->s_bdev, block, sb->s_blocksize, gfp); } static inline struct buffer_head * -- cgit v1.2.3 From 93b13ecaa713e1fcbf23b7483eec065d300c5ad8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 14 Sep 2023 16:00:11 +0100 Subject: buffer: remove __getblk_gfp() Inline it into __bread_gfp(). Link: https://lkml.kernel.org/r/20230914150011.843330-9-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hui Zhu Signed-off-by: Andrew Morton --- include/linux/buffer_head.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 22f13eece719..3dc4720e4773 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -229,8 +229,6 @@ struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp); -struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, - unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); -- cgit v1.2.3 From 3dfbb555c98ac55b9d911f9af0e35014b445fb41 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 14 Sep 2023 15:16:39 +0200 Subject: mm, vmscan: remove ISOLATE_UNMAPPED This isolate_mode_t flag is effectively unused since 89f6c88a6ab4 ("mm: __isolate_lru_page_prepare() in isolate_migratepages_block()") as sc->may_unmap is now checked directly (and only node_reclaim has a mode that sets it to 0). The last remaining place is mm_vmscan_lru_isolate tracepoint for the isolate_mode parameter. That one was mainly used to indicate the active/inactive mode, which the trace-vmscan-postprocess.pl script consumed, but that got silently broken. After fixing the script by the previous patch, it does not need the isolate_mode anymore. So just remove the parameter and with that the whole ISOLATE_UNMAPPED flag. Link: https://lkml.kernel.org/r/20230914131637.12204-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka Cc: Hugh Dickins Cc: Mel Gorman Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4106fbc5b4b3..486587fcd27f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -639,8 +639,6 @@ struct lruvec { #endif }; -/* Isolate unmapped pages */ -#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) /* Isolate for asynchronous migration */ #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) /* Isolate unevictable pages */ -- cgit v1.2.3 From 4472edf63d6630e6cf65e205b4fc8c3c94d0afe5 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 14 Sep 2023 02:15:23 +0000 Subject: mm/damon/core: use number of passed access sampling as a timer DAMON sleeps for sampling interval after each sampling, and check if the aggregation interval and the ops update interval have passed using ktime_get_coarse_ts64() and baseline timestamps for the intervals. That design is for making the operations occur at deterministic timing regardless of the time that spend for each work. However, it turned out it is not that useful, and incur not-that-intuitive results. After all, timer functions, and especially sleep functions that DAMON uses to wait for specific timing, are not necessarily strictly accurate. It is legal design, so no problem. However, depending on such inaccuracies, the nr_accesses can be larger than aggregation interval divided by sampling interval. For example, with the default setting (5 ms sampling interval and 100 ms aggregation interval) we frequently show regions having nr_accesses larger than 20. Also, if the execution of a DAMOS scheme takes a long time, next aggregation could happen before enough number of samples are collected. This is not what usual users would intuitively expect. Since access check sampling is the smallest unit work of DAMON, using the number of passed sampling intervals as the DAMON-internal timer can easily avoid these problems. That is, convert aggregation and ops update intervals to numbers of sampling intervals that need to be passed before those operations be executed, count the number of passed sampling intervals, and invoke the operations as soon as the specific amount of sampling intervals passed. Make the change. Note that this could make a behavioral change to settings that using intervals that not aligned by the sampling interval. For example, if the sampling interval is 5 ms and the aggregation interval is 12 ms, DAMON effectively uses 15 ms as its aggregation interval, because it checks whether the aggregation interval after sleeping the sampling interval. This change will make DAMON to effectively use 10 ms as aggregation interval, since it uses 'aggregation interval / sampling interval * sampling interval' as the effective aggregation interval, and we don't use floating point types. Usual users would have used aligned intervals, so this behavioral change is not expected to make any meaningful impact, so just make this change. Link: https://lkml.kernel.org/r/20230914021523.60649-1-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index ab3089de1478..9a32b8fd0bd3 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -524,8 +524,18 @@ struct damon_ctx { struct damon_attrs attrs; /* private: internal use only */ - struct timespec64 last_aggregation; - struct timespec64 last_ops_update; + /* number of sample intervals that passed since this context started */ + unsigned long passed_sample_intervals; + /* + * number of sample intervals that should be passed before next + * aggregation + */ + unsigned long next_aggregation_sis; + /* + * number of sample intervals that should be passed before next ops + * update + */ + unsigned long next_ops_update_sis; /* public: */ struct task_struct *kdamond; -- cgit v1.2.3 From 78fbfb155d204428119310d1b9df665ab88da6e8 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:44 +0000 Subject: mm/damon/core: define and use a dedicated function for region access rate update Patch series "mm/damon: provide pseudo-moving sum based access rate". DAMON checks the access to each region for every sampling interval, increase the access rate counter of the region, namely nr_accesses, if the access was made. For every aggregation interval, the counter is reset. The counter is exposed to users to be used as a metric showing the relative access rate (frequency) of each region. In other words, DAMON provides access rate of each region in every aggregation interval. The aggregation avoids temporal access pattern changes making things confusing. However, this also makes a few DAMON-related operations to unnecessarily need to be aligned to the aggregation interval. This can restrict the flexibility of DAMON applications, especially when the aggregation interval is huge. To provide the monitoring results in finer-grained timing while keeping handling of temporal access pattern change, this patchset implements a pseudo-moving sum based access rate metric. It is pseudo-moving sum because strict moving sum implementation would need to keep all values for last time window, and that could incur high overhead of there could be arbitrary number of values in a time window. Especially in case of the nr_accesses, since the sampling interval and aggregation interval can arbitrarily set and the past values should be maintained for every region, it could be risky. The pseudo-moving sum assumes there were no temporal access pattern change in last discrete time window to remove the needs for keeping the list of the last time window values. As a result, it beocmes not strict moving sum implementation, but provides a reasonable accuracy. Also, it keeps an important property of the moving sum. That is, the moving sum becomes same to discrete-window based sum at the time that aligns to the time window. This means using the pseudo moving sum based nr_accesses makes no change to users who shows the value for every aggregation interval. Patches Sequence ---------------- The sequence of the patches is as follows. The first four patches are for preparation of the change. The first two (patches 1 and 2) implements a helper function for nr_accesses update and eliminate corner case that skips use of the function, respectively. Following two (patches 3 and 4) respectively implement the pseudo-moving sum function and its simple unit test case. Two patches for making DAMON to use the pseudo-moving sum follow. The fifthe one (patch 5) introduces a new field for representing the pseudo-moving sum-based access rate of each region, and the sixth one makes the new representation to actually updated with the pseudo-moving sum function. Last two patches (patches 7 and 8) makes followup fixes for skipping unnecessary updates and marking the moving sum function as static, respectively. This patch (of 8): Each DAMON operarions set is updating nr_accesses field of each damon_region for each of their access check results, from the check_accesses() callback. Directly accessing the field could make things complex to manage and change in future. Define and use a dedicated function for the purpose. Link: https://lkml.kernel.org/r/20230915025251.72816-1-sj@kernel.org Link: https://lkml.kernel.org/r/20230915025251.72816-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- include/linux/damon.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 9a32b8fd0bd3..17c504d236b9 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -45,7 +45,9 @@ struct damon_addr_range { * * @nr_accesses is reset to zero for every &damon_attrs->aggr_interval and be * increased for every &damon_attrs->sample_interval if an access to the region - * during the last sampling interval is found. + * during the last sampling interval is found. The update of this field should + * not be done with direct access but with the helper function, + * damon_update_region_access_rate(). * * @age is initially zero, increased for each aggregation interval, and reset * to zero again if the access frequency is significantly changed. If two @@ -620,6 +622,7 @@ void damon_add_region(struct damon_region *r, struct damon_target *t); void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); +void damon_update_region_access_rate(struct damon_region *r, bool accessed); struct damos_filter *damos_new_filter(enum damos_filter_type type, bool matching); -- cgit v1.2.3 From d2c062ade07ffd206dd16bf085f02abc59651309 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:46 +0000 Subject: mm/damon/core: implement a pseudo-moving sum function For values that continuously change, moving average or sum are good ways to provide fast updates while handling temporal and errorneous variability of the value. For example, the access rate counter (nr_accesses) is calculated as a sum of the number of positive sampled access check results that collected during a discrete time window (aggregation interval), and hence it handles temporal and errorneous access check results, but provides the update only for every aggregation interval. Using a moving sum method for that could allow providing the value for every sampling interval. That could be useful for getting monitoring results snapshot or running DAMOS in fine-grained timing. However, supporting the moving sum for cases that number of samples in the time window is arbirary could impose high overhead, since the number of past values that it needs to keep could be too high. The nr_accesses would also be one of the cases. To mitigate the overhead, implement a pseudo-moving sum function that only provides an estimated pseudo-moving sum. It assumes there was no error in last discrete time window and subtract constant portion of last discrete time window sum. Note that the function is not strictly implementing the moving sum, but it keeps a property of moving sum, which makes the value same to the dsicrete-window based sum for each time window-aligned timing. Hence, people collecting the value in the old timings would show no difference. Link: https://lkml.kernel.org/r/20230915025251.72816-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- include/linux/damon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 17c504d236b9..487a545a11b4 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -622,6 +622,8 @@ void damon_add_region(struct damon_region *r, struct damon_target *t); void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); +unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, + unsigned int len_window, unsigned int new_value); void damon_update_region_access_rate(struct damon_region *r, bool accessed); struct damos_filter *damos_new_filter(enum damos_filter_type type, -- cgit v1.2.3 From 80333828ea7728ebe85d079bb5c1467eb9fc6c8c Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:48 +0000 Subject: mm/damon/core: introduce nr_accesses_bp Add yet another representation of the access rate of each region, namely nr_accesses_bp. It is just same to the nr_accesses but represents the value in basis point (1 in 10,000), and updated at once in every aggregation interval. That is, moving_accesses_bp is just nr_accesses * 10000. This may seems useless at the moment. However, it will be useful for representing less than one nr_accesses value that will be needed to make moving sum-based nr_accesses. Link: https://lkml.kernel.org/r/20230915025251.72816-6-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- include/linux/damon.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 487a545a11b4..15f24b23c9a0 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -40,6 +40,7 @@ struct damon_addr_range { * @ar: The address range of the region. * @sampling_addr: Address of the sample for the next access check. * @nr_accesses: Access frequency of this region. + * @nr_accesses_bp: @nr_accesses in basis point (0.01%). * @list: List head for siblings. * @age: Age of this region. * @@ -49,6 +50,9 @@ struct damon_addr_range { * not be done with direct access but with the helper function, * damon_update_region_access_rate(). * + * @nr_accesses_bp is another representation of @nr_accesses in basis point + * (1 in 10,000) that updated every aggregation interval. + * * @age is initially zero, increased for each aggregation interval, and reset * to zero again if the access frequency is significantly changed. If two * regions are merged into a new region, both @nr_accesses and @age of the new @@ -58,6 +62,7 @@ struct damon_region { struct damon_addr_range ar; unsigned long sampling_addr; unsigned int nr_accesses; + unsigned int nr_accesses_bp; struct list_head list; unsigned int age; -- cgit v1.2.3 From ace30fb21af5f1be1605db72c16040b95b1557ef Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:49 +0000 Subject: mm/damon/core: use pseudo-moving sum for nr_accesses_bp Let nr_accesses_bp be calculated as a pseudo-moving sum that updated for every sampling interval, using damon_moving_sum(). This is assumed to be useful for cases that the aggregation interval is set quite huge, but the monivoting results need to be collected earlier than next aggregation interval is passed. Link: https://lkml.kernel.org/r/20230915025251.72816-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- include/linux/damon.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 15f24b23c9a0..0fe13482df63 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -40,7 +40,8 @@ struct damon_addr_range { * @ar: The address range of the region. * @sampling_addr: Address of the sample for the next access check. * @nr_accesses: Access frequency of this region. - * @nr_accesses_bp: @nr_accesses in basis point (0.01%). + * @nr_accesses_bp: @nr_accesses in basis point (0.01%) that updated for + * each sampling interval. * @list: List head for siblings. * @age: Age of this region. * @@ -51,7 +52,11 @@ struct damon_addr_range { * damon_update_region_access_rate(). * * @nr_accesses_bp is another representation of @nr_accesses in basis point - * (1 in 10,000) that updated every aggregation interval. + * (1 in 10,000) that updated for every &damon_attrs->sample_interval in a + * manner similar to moving sum. By the algorithm, this value becomes + * @nr_accesses * 10000 for every &struct damon_attrs->aggr_interval. This can + * be used when the aggregation interval is too huge and therefore cannot wait + * for it before getting the access monitoring results. * * @age is initially zero, increased for each aggregation interval, and reset * to zero again if the access frequency is significantly changed. If two @@ -629,7 +634,8 @@ int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, unsigned int len_window, unsigned int new_value); -void damon_update_region_access_rate(struct damon_region *r, bool accessed); +void damon_update_region_access_rate(struct damon_region *r, bool accessed, + struct damon_attrs *attrs); struct damos_filter *damos_new_filter(enum damos_filter_type type, bool matching); -- cgit v1.2.3 From 863803a7948c8e33e6a7b002017747ca83ecfd63 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:51 +0000 Subject: mm/damon/core: mark damon_moving_sum() as a static function The function is used by only mm/damon/core.c. Mark it as a static function. Link: https://lkml.kernel.org/r/20230915025251.72816-9-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- include/linux/damon.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 0fe13482df63..491fdd3e4c76 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -632,8 +632,6 @@ void damon_add_region(struct damon_region *r, struct damon_target *t); void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); -unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, - unsigned int len_window, unsigned int new_value); void damon_update_region_access_rate(struct damon_region *r, bool accessed, struct damon_attrs *attrs); -- cgit v1.2.3 From 77e6c43e137c130138c3fbadc847351a83c4befe Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Wed, 13 Sep 2023 11:54:00 +0100 Subject: memblock: introduce MEMBLOCK_RSRV_NOINIT flag For reserved memory regions marked with this flag, reserve_bootmem_region is not called during memmap_init_reserved_pages. This can be used to avoid struct page initialization for regions which won't need them, for e.g. hugepages with Hugepage Vmemmap Optimization enabled. Link: https://lkml.kernel.org/r/20230913105401.519709-4-usama.arif@bytedance.com Signed-off-by: Usama Arif Acked-by: Muchun Song Reviewed-by: Mike Rapoport (IBM) Cc: Fam Zheng Cc: Mike Kravetz Cc: Punit Agrawal Signed-off-by: Andrew Morton --- include/linux/memblock.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 1c1072e3ca06..ae3bde302f70 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -40,6 +40,8 @@ extern unsigned long long max_possible_pfn; * via a driver, and never indicated in the firmware-provided memory map as * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the * kernel resource tree. + * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are + * not initialized (only for reserved regions). */ enum memblock_flags { MEMBLOCK_NONE = 0x0, /* No special request */ @@ -47,6 +49,7 @@ enum memblock_flags { MEMBLOCK_MIRROR = 0x2, /* mirrored region */ MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */ + MEMBLOCK_RSRV_NOINIT = 0x10, /* don't initialize struct pages */ }; /** @@ -125,6 +128,7 @@ int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); +int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size); void memblock_free_all(void); void memblock_free(void *ptr, size_t size); @@ -259,6 +263,11 @@ static inline bool memblock_is_nomap(struct memblock_region *m) return m->flags & MEMBLOCK_NOMAP; } +static inline bool memblock_is_reserved_noinit(struct memblock_region *m) +{ + return m->flags & MEMBLOCK_RSRV_NOINIT; +} + static inline bool memblock_is_driver_managed(struct memblock_region *m) { return m->flags & MEMBLOCK_DRIVER_MANAGED; -- cgit v1.2.3 From 42f994b71404b17abcd6b170de7a6aa95ffe5d4a Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 16 Sep 2023 02:09:40 +0000 Subject: mm/damon/core: implement scheme-specific apply interval DAMON-based operation schemes are applied for every aggregation interval. That was mainly because schemes were using nr_accesses, which be complete to be used for every aggregation interval. However, the schemes are now using nr_accesses_bp, which is updated for each sampling interval in a way that reasonable to be used. Therefore, there is no reason to apply schemes for each aggregation interval. The unnecessary alignment with aggregation interval was also making some use cases of DAMOS tricky. Quotas setting under long aggregation interval is one such example. Suppose the aggregation interval is ten seconds, and there is a scheme having CPU quota 100ms per 1s. The scheme will actually uses 100ms per ten seconds, since it cannobe be applied before next aggregation interval. The feature is working as intended, but the results might not that intuitive for some users. This could be fixed by updating the quota to 1s per 10s. But, in the case, the CPU usage of DAMOS could look like spikes, and would actually make a bad effect to other CPU-sensitive workloads. Implement a dedicated timing interval for each DAMON-based operation scheme, namely apply_interval. The interval will be sampling interval aligned, and each scheme will be applied for its apply_interval. The interval is set to 0 by default, and it means the scheme should use the aggregation interval instead. This avoids old users getting any behavioral difference. Link: https://lkml.kernel.org/r/20230916020945.47296-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/damon.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 491fdd3e4c76..27b995c22497 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -314,16 +314,19 @@ struct damos_access_pattern { * struct damos - Represents a Data Access Monitoring-based Operation Scheme. * @pattern: Access pattern of target regions. * @action: &damo_action to be applied to the target regions. + * @apply_interval_us: The time between applying the @action. * @quota: Control the aggressiveness of this scheme. * @wmarks: Watermarks for automated (in)activation of this scheme. * @filters: Additional set of &struct damos_filter for &action. * @stat: Statistics of this scheme. * @list: List head for siblings. * - * For each aggregation interval, DAMON finds regions which fit in the + * For each @apply_interval_us, DAMON finds regions which fit in the * &pattern and applies &action to those. To avoid consuming too much * CPU time or IO resources for the &action, "a is used. * + * If @apply_interval_us is zero, &damon_attrs->aggr_interval is used instead. + * * To do the work only when needed, schemes can be activated for specific * system situations using &wmarks. If all schemes that registered to the * monitoring context are inactive, DAMON stops monitoring either, and just @@ -340,6 +343,14 @@ struct damos_access_pattern { struct damos { struct damos_access_pattern pattern; enum damos_action action; + unsigned long apply_interval_us; +/* private: internal use only */ + /* + * number of sample intervals that should be passed before applying + * @action + */ + unsigned long next_apply_sis; +/* public: */ struct damos_quota quota; struct damos_watermarks wmarks; struct list_head filters; @@ -641,7 +652,9 @@ void damos_add_filter(struct damos *s, struct damos_filter *f); void damos_destroy_filter(struct damos_filter *f); struct damos *damon_new_scheme(struct damos_access_pattern *pattern, - enum damos_action action, struct damos_quota *quota, + enum damos_action action, + unsigned long apply_interval_us, + struct damos_quota *quota, struct damos_watermarks *wmarks); void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); void damon_destroy_scheme(struct damos *s); -- cgit v1.2.3 From a8306f2d4dcea03538c70c26d2948483f70254ff Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 31 Aug 2023 09:33:40 -0700 Subject: compiler.h: unify __UNIQUE_ID commit 6f33d58794ef ("__UNIQUE_ID()") added a fallback definition of __UNIQUE_ID because gcc 4.2 and older did not support __COUNTER__. Also, this commit is effectively a revert of commit b41c29b0527c ("Kbuild: provide a __UNIQUE_ID for clang") which mentions clang 2.6+ supporting __COUNTER__. Documentation/process/changes.rst currently lists the minimum supported version of these compilers as: - gcc: 5.1 - clang: 11.0.0 It should be safe to say that __COUNTER__ is well supported by this point. Link: https://lkml.kernel.org/r/20230831-unique_id-v1-1-28bacd18eb1d@google.com Signed-off-by: Nick Desaulniers Cc: Arnd Bergmann Cc: Jan Beulich Cc: Luc Van Oostenryck Cc: Michal rarek Cc: Nathan Chancellor Cc: Paul Russel Cc: Tom Rix Signed-off-by: Andrew Morton --- include/linux/compiler-clang.h | 5 ----- include/linux/compiler-gcc.h | 2 -- include/linux/compiler.h | 5 +---- 3 files changed, 1 insertion(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 9b673fefcef8..ddab1ef22bee 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -14,11 +14,6 @@ #undef __cleanup #define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func))) -/* same as gcc, this was present in clang-2.6 so we can assume it works - * with any version that can compile the kernel - */ -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - /* all clang versions usable with the kernel support KASAN ABI version 5 */ #define KASAN_ABI_VERSION 5 diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7af9e34ec261..2ceba3fe4ec1 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -39,8 +39,6 @@ #define __noretpoline __attribute__((__indirect_branch__("keep"))) #endif -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) #define __latent_entropy __attribute__((latent_entropy)) #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d7779a18b24f..174099fdc485 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -177,10 +177,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, __asm__ ("" : "=r" (var) : "0" (var)) #endif -/* Not-quite-unique ID. */ -#ifndef __UNIQUE_ID -# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) -#endif +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) /** * data_race - mark an expression as containing intentional data races -- cgit v1.2.3 From 33a9813825710fdc2b980d566ee391fd093a36c6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 24 Aug 2023 16:31:42 +0200 Subject: introduce __next_thread(), fix next_tid() vs exec() race Patch series "introduce __next_thread(), change next_thread()". After commit dce8f8ed1de1 ("document while_each_thread(), change first_tid() to use for_each_thread()") + this series 1. We have only one lockless user of next_thread(), task_group_seq_get_next(). I think it should be changed too. 2. We have only one user of task_struct->thread_group, thread_group_empty(). The next patches will change thread_group_empty() and kill ->thread_group. This patch (of 2): next_tid(start) does: rcu_read_lock(); if (pid_alive(start)) { pos = next_thread(start); if (thread_group_leader(pos)) pos = NULL; else get_task_struct(pos); it should return pos = NULL when next_thread() wraps to the 1st thread in the thread group, group leader, and the thread_group_leader() check tries to detect this case. But this can race with exec. To simplify, suppose we have a main thread M and a single sub-thread T, next_tid(T) should return NULL. Now suppose that T execs. If next_tid(T) is called after T changes the leadership and before it does release_task() which removes the old leader from list, then next_thread() returns M and thread_group_leader(M) = F. Lockless use of next_thread() should be avoided. After this change only task_group_seq_get_next() does this, and I believe it should be changed as well. Link: https://lkml.kernel.org/r/20230824143112.GA31208@redhat.com Link: https://lkml.kernel.org/r/20230824143142.GA31222@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/sched/signal.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0014d3adaf84..7fb34b8cda54 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -715,6 +715,17 @@ bool same_thread_group(struct task_struct *p1, struct task_struct *p2) return p1->signal == p2->signal; } +/* + * returns NULL if p is the last thread in the thread group + */ +static inline struct task_struct *__next_thread(struct task_struct *p) +{ + return list_next_or_null_rcu(&p->signal->thread_head, + &p->thread_node, + struct task_struct, + thread_node); +} + static inline struct task_struct *next_thread(const struct task_struct *p) { return list_entry_rcu(p->thread_group.next, -- cgit v1.2.3 From d639cf4abb4d171ab2456904da5668c42b5c1937 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 24 Aug 2023 16:32:01 +0200 Subject: change next_thread() to use __next_thread() ?: group_leader This relies on fact that group leader is always the 1st entry in the signal->thread_head list. With or without this change, if the lockless next_thread(last_thread) races with exec it can return the old or the new leader. We are almost ready to kill task->thread_group, after this change its only user is thread_group_empty(). Link: https://lkml.kernel.org/r/20230824143201.GB31222@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/sched/signal.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 7fb34b8cda54..cffc882d367f 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -726,10 +726,9 @@ static inline struct task_struct *__next_thread(struct task_struct *p) thread_node); } -static inline struct task_struct *next_thread(const struct task_struct *p) +static inline struct task_struct *next_thread(struct task_struct *p) { - return list_entry_rcu(p->thread_group.next, - struct task_struct, thread_group); + return __next_thread(p) ?: p->group_leader; } static inline int thread_group_empty(struct task_struct *p) -- cgit v1.2.3 From e34a35ee1f52312af130b5ebd42fa28313fc6149 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 26 Aug 2023 13:14:06 +0200 Subject: change thread_group_empty() to use task_struct->thread_node Patch series "kill task_struct->thread_group". This patch (of 2): It could use list_is_singular() but this way it is cheaper. Plus the thread_group_leader() check makes it clear that thread_group_empty() can only return true if p is a group leader. This was not immediately obvious before this patch. task_struct->thread_group no longer has users, it can die. Link: https://lkml.kernel.org/r/20230826111200.GA22982@redhat.com Link: https://lkml.kernel.org/r/20230826111406.GA23238@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/sched/signal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index cffc882d367f..d7fa3ca2fa53 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -733,7 +733,8 @@ static inline struct task_struct *next_thread(struct task_struct *p) static inline int thread_group_empty(struct task_struct *p) { - return list_empty(&p->thread_group); + return thread_group_leader(p) && + list_is_last(&p->thread_node, &p->signal->thread_head); } #define delay_group_leader(p) \ -- cgit v1.2.3 From 8e1f385104ac044f1552686ad6e1cbc71cc05a30 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 26 Aug 2023 13:14:09 +0200 Subject: kill task_struct->thread_group The last user was removed by the previous patch. Link: https://lkml.kernel.org/r/20230826111409.GA23243@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 77f01ac385f7..6d1341b1673f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1002,7 +1002,6 @@ struct task_struct { /* PID/PID hash table linkage. */ struct pid *thread_pid; struct hlist_node pid_links[PIDTYPE_MAX]; - struct list_head thread_group; struct list_head thread_node; struct completion *vfork_done; -- cgit v1.2.3 From 9cba82bba500e3ce875381350f289cfb3aa633ba Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Tue, 5 Sep 2023 02:48:33 +0000 Subject: seq_file: add helper macro to define attribute for rw file Patch series "Add helper macro DEFINE_SHOW_STORE_ATTRIBUTE() at seq_file.c", v6. We already own DEFINE_SHOW_ATTRIBUTE() helper macro for defining attribute for read-only file, but we found many of drivers also want a helper macro for read-write file too. So we add this helper macro to reduce duplicated code. This patch (of 3): We already own DEFINE_SHOW_ATTRIBUTE() helper macro for defining attribute for read-only file, but many of drivers want a helper macro for read-write file too. So we add DEFINE_SHOW_STORE_ATTRIBUTE() helper to reduce duplicated code. Link: https://lkml.kernel.org/r/20230905024835.43219-1-yangxingui@huawei.com Link: https://lkml.kernel.org/r/20230905024835.43219-2-yangxingui@huawei.com Signed-off-by: Luo Jiaxing Co-developed-by: Xingui Yang Signed-off-by: Xingui Yang Reviewed-by: Andy Shevchenko Cc: Al Viro Cc: Animesh Manna Cc: Anshuman Gupta Cc: Damien Le Moal Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Himanshu Madhani Cc: James Bottomley Cc: John Garry Cc: Martin K. Petersen Cc: Uma Shankar Cc: Xiang Chen Cc: Zeng Tao Signed-off-by: Andrew Morton --- include/linux/seq_file.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 386ab580b839..234bcdb1fba4 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -207,6 +207,21 @@ static const struct file_operations __name ## _fops = { \ .release = single_release, \ } +#define DEFINE_SHOW_STORE_ATTRIBUTE(__name) \ +static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ + \ +static const struct file_operations __name ## _fops = { \ + .owner = THIS_MODULE, \ + .open = __name ## _open, \ + .read = seq_read, \ + .write = __name ## _write, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} + #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ -- cgit v1.2.3 From 6309727ef27162deabd5c095c11af24970fba5a2 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 8 Sep 2023 01:40:48 +0200 Subject: kthread: add kthread_stop_put Add a kthread_stop_put() helper that stops a thread and puts its task struct. Use it to replace the various instances of kthread_stop() followed by put_task_struct(). Remove the kthread_stop_put() macro in usbip that is similar but doesn't return the result of kthread_stop(). [agruenba@redhat.com: fix kerneldoc comment] Link: https://lkml.kernel.org/r/20230911111730.2565537-1-agruenba@redhat.com [akpm@linux-foundation.org: document kthread_stop_put()'s argument] Link: https://lkml.kernel.org/r/20230907234048.2499820-1-agruenba@redhat.com Signed-off-by: Andreas Gruenbacher Signed-off-by: Andrew Morton --- include/linux/kthread.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 2c30ade43bc8..b11f53c1ba2e 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -86,6 +86,7 @@ void free_kthread_struct(struct task_struct *k); void kthread_bind(struct task_struct *k, unsigned int cpu); void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask); int kthread_stop(struct task_struct *k); +int kthread_stop_put(struct task_struct *k); bool kthread_should_stop(void); bool kthread_should_park(void); bool kthread_should_stop_or_park(void); -- cgit v1.2.3 From 5e57418a2031cd5e1863efdf3d7447a16a368172 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 11 Sep 2023 18:49:13 +0300 Subject: minmax: deduplicate __unconst_integer_typeof() It appears that compiler_types.h already have an implementation of the __unconst_integer_typeof() called __unqual_scalar_typeof(). Use it instead of the copy. Link: https://lkml.kernel.org/r/20230911154913.4176033-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Herve Codina Signed-off-by: Andrew Morton --- include/linux/minmax.h | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 83aebc244cba..69bbe987fa87 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -2,6 +2,7 @@ #ifndef _LINUX_MINMAX_H #define _LINUX_MINMAX_H +#include #include #include @@ -134,27 +135,6 @@ */ #define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >) -/* - * Remove a const qualifier from integer types - * _Generic(foo, type-name: association, ..., default: association) performs a - * comparison against the foo type (not the qualified type). - * Do not use the const keyword in the type-name as it will not match the - * unqualified type of foo. - */ -#define __unconst_integer_type_cases(type) \ - unsigned type: (unsigned type)0, \ - signed type: (signed type)0 - -#define __unconst_integer_typeof(x) typeof( \ - _Generic((x), \ - char: (char)0, \ - __unconst_integer_type_cases(char), \ - __unconst_integer_type_cases(short), \ - __unconst_integer_type_cases(int), \ - __unconst_integer_type_cases(long), \ - __unconst_integer_type_cases(long long), \ - default: (x))) - /* * Do not check the array parameter using __must_be_array(). * In the following legit use-case where the "array" passed is a simple pointer, @@ -169,13 +149,13 @@ * 'int *buff' and 'int buff[N]' types. * * The array can be an array of const items. - * typeof() keeps the const qualifier. Use __unconst_integer_typeof() in order + * typeof() keeps the const qualifier. Use __unqual_scalar_typeof() in order * to discard the const qualifier for the __element variable. */ #define __minmax_array(op, array, len) ({ \ typeof(&(array)[0]) __array = (array); \ typeof(len) __len = (len); \ - __unconst_integer_typeof(__array[0]) __element = __array[--__len]; \ + __unqual_scalar_typeof(__array[0]) __element = __array[--__len];\ while (__len--) \ __element = op(__element, __array[__len]); \ __element; }) -- cgit v1.2.3 From f6e9d38f8eb00ac8b52e6d15f6aa9bcecacb081b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 Sep 2023 12:23:55 +0300 Subject: minmax: fix header inclusions BUILD_BUG_ON*() macros are defined in build_bug.h. Include it. Replace compiler_types.h by compiler.h, which provides the former, to have a definition of the __UNIQUE_ID(). Link: https://lkml.kernel.org/r/20230912092355.79280-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Herve Codina Cc: Rasmus Villemoes Signed-off-by: Andrew Morton --- include/linux/minmax.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 69bbe987fa87..ca69abd6151e 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -2,7 +2,8 @@ #ifndef _LINUX_MINMAX_H #define _LINUX_MINMAX_H -#include +#include +#include #include #include -- cgit v1.2.3 From a9e1a3d84e4a0ea560ed4d84c28d06dbfdffed22 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 14 Sep 2023 11:31:35 +0800 Subject: crash_core: change the prototype of function parse_crashkernel() Add two parameters 'low_size' and 'high' to function parse_crashkernel(), later crashkernel=,high|low parsing will be added. Make adjustments in all call sites of parse_crashkernel() in arch. Link: https://lkml.kernel.org/r/20230914033142.676708-3-bhe@redhat.com Signed-off-by: Baoquan He Reviewed-by: Zhen Lei Cc: Catalin Marinas Cc: Chen Jiahao Signed-off-by: Andrew Morton --- include/linux/crash_core.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 0c06561bf5ff..6156355ef831 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -80,7 +80,8 @@ Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void final_note(Elf_Word *buf); int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); + unsigned long long *crash_size, unsigned long long *crash_base, + unsigned long long *low_size, bool *high); int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, -- cgit v1.2.3 From 70916e9c8d9f1a286c99727072b22e395097909f Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 14 Sep 2023 11:31:36 +0800 Subject: crash_core: change parse_crashkernel() to support crashkernel=,high|low parsing Now parse_crashkernel() is a real entry point for all kinds of crahskernel parsing on any architecture. And wrap the crahskernel=,high|low handling inside CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION ifdeffery scope. Link: https://lkml.kernel.org/r/20230914033142.676708-4-bhe@redhat.com Signed-off-by: Baoquan He Reviewed-by: Zhen Lei Cc: Catalin Marinas Cc: Chen Jiahao Signed-off-by: Andrew Morton --- include/linux/crash_core.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 6156355ef831..d8050a7eab01 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -79,6 +79,12 @@ Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); void final_note(Elf_Word *buf); +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE +#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) +#endif +#endif + int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, unsigned long long *low_size, bool *high); -- cgit v1.2.3 From 0ab97169aa0517079b22c2e64192906caa5dc6d5 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 14 Sep 2023 11:31:37 +0800 Subject: crash_core: add generic function to do reservation In architecture like x86_64, arm64 and riscv, they have vast virtual address space and usually have huge physical memory RAM. Their crashkernel reservation doesn't have to be limited under 4G RAM, but can be extended to the whole physical memory via crashkernel=,high support. Now add function reserve_crashkernel_generic() to reserve crashkernel memory if users specify any case of kernel pamameters, like crashkernel=xM[@offset] or crashkernel=,high|low. This is preparation to simplify code of crashkernel=,high support in architecutures. Link: https://lkml.kernel.org/r/20230914033142.676708-5-bhe@redhat.com Signed-off-by: Baoquan He Reviewed-by: Zhen Lei Cc: Catalin Marinas Cc: Chen Jiahao Signed-off-by: Andrew Morton --- include/linux/crash_core.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index d8050a7eab01..4dbd6565e0ff 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -93,6 +93,34 @@ int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE +#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) +#endif +#ifndef CRASH_ALIGN +#define CRASH_ALIGN SZ_2M +#endif +#ifndef CRASH_ADDR_LOW_MAX +#define CRASH_ADDR_LOW_MAX SZ_4G +#endif +#ifndef CRASH_ADDR_HIGH_MAX +#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() +#endif + +void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high); +#else +static inline void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high) +{} +#endif + /* Alignment required for elf header segment */ #define ELF_CORE_HEADER_ALIGN 4096 -- cgit v1.2.3 From b631b95dded5e7f007a3a79cbaf82ef50c1e2cf7 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 14 Sep 2023 11:31:38 +0800 Subject: crash_core: move crashk_*res definition into crash_core.c Both crashk_res and crashk_low_res are used to mark the reserved crashkernel regions in iomem_resource tree. And later the generic crashkernel resrvation will be added into crash_core.c. So move crashk_res and crashk_low_res definition into crash_core.c to avoid compiling error if CONFIG_CRASH_CORE=on while CONFIG_KEXEC_CORE is unset. Meanwhile include in if generic reservation is needed. In that case, need be added by ARCH. In asm/crash_core.h, ARCH can provide its own macro definitions to override macros in if needed. Wrap the including into CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION ifdeffery scope to avoid compiling error in other ARCH-es which don't take the generic reservation way yet. Link: https://lkml.kernel.org/r/20230914033142.676708-6-bhe@redhat.com Signed-off-by: Baoquan He Reviewed-by: Zhen Lei Cc: Catalin Marinas Cc: Chen Jiahao Signed-off-by: Andrew Morton --- include/linux/crash_core.h | 8 ++++++++ include/linux/kexec.h | 4 ---- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 4dbd6565e0ff..3c735a7e33fb 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -5,6 +5,14 @@ #include #include #include +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#include +#endif + +/* Location of a reserved region to hold the crash kernel. + */ +extern struct resource crashk_res; +extern struct resource crashk_low_res; #define CRASH_CORE_NOTE_NAME "CORE" #define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 32c78078552c..8227455192b7 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -22,10 +22,6 @@ #include #include -/* Location of a reserved region to hold the crash kernel. - */ -extern struct resource crashk_res; -extern struct resource crashk_low_res; extern note_buf_t __percpu *crash_notes; #ifdef CONFIG_KEXEC_CORE -- cgit v1.2.3 From c37e56cac3d62c69f093904afbc58fc428484d14 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 14 Sep 2023 11:31:42 +0800 Subject: crash_core.c: remove unneeded functions So far, nobody calls functions parse_crashkernel_high() and parse_crashkernel_low(), remove both of them. Link: https://lkml.kernel.org/r/20230914033142.676708-10-bhe@redhat.com Signed-off-by: Baoquan He Reviewed-by: Zhen Lei Cc: Catalin Marinas Cc: Chen Jiahao Signed-off-by: Andrew Morton --- include/linux/crash_core.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 3c735a7e33fb..3426f6eef60b 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -96,10 +96,6 @@ void final_note(Elf_Word *buf); int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, unsigned long long *low_size, bool *high); -int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); -int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION #ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE -- cgit v1.2.3 From 2cb1f6e9a743af58a23cf14563b5eada1e0d3fde Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Sep 2023 22:36:00 +0200 Subject: rcu: Conditionally build CPU-hotplug teardown callbacks Among the three CPU-hotplug teardown RCU callbacks, two of them early exit if CONFIG_HOTPLUG_CPU=n, and one is left unchanged. In any case all of them have an implementation when CONFIG_HOTPLUG_CPU=n. Align instead with the common way to deal with CPU-hotplug teardown callbacks and provide a proper stub when they are not supported. Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- include/linux/rcutree.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 153cfc7bbffd..46875c4e9f56 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -110,9 +110,16 @@ void rcu_all_qs(void); /* RCUtree hotplug events */ int rcutree_prepare_cpu(unsigned int cpu); int rcutree_online_cpu(unsigned int cpu); -int rcutree_offline_cpu(unsigned int cpu); +void rcu_cpu_starting(unsigned int cpu); + +#ifdef CONFIG_HOTPLUG_CPU int rcutree_dead_cpu(unsigned int cpu); int rcutree_dying_cpu(unsigned int cpu); -void rcu_cpu_starting(unsigned int cpu); +int rcutree_offline_cpu(unsigned int cpu); +#else +#define rcutree_dead_cpu NULL +#define rcutree_dying_cpu NULL +#define rcutree_offline_cpu NULL +#endif #endif /* __LINUX_RCUTREE_H */ -- cgit v1.2.3 From 448e9f34d91d1a4799fdb06a93c2c24b34b6fd9d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Sep 2023 22:36:01 +0200 Subject: rcu: Standardize explicit CPU-hotplug calls rcu_report_dead() and rcutree_migrate_callbacks() have their headers in rcupdate.h while those are pure rcutree calls, like the other CPU-hotplug functions. Also rcu_cpu_starting() and rcu_report_dead() have different naming conventions while they mirror each other's effects. Fix the headers and propose a naming that relates both functions and aligns with the prefix of other rcutree CPU-hotplug functions. Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- include/linux/interrupt.h | 2 +- include/linux/rcupdate.h | 2 -- include/linux/rcutiny.h | 2 +- include/linux/rcutree.h | 7 ++++++- 4 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a92bce40b04b..d05e1e9a553c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -566,7 +566,7 @@ enum * * _ RCU: * 1) rcutree_migrate_callbacks() migrates the queue. - * 2) rcu_report_dead() reports the final quiescent states. + * 2) rcutree_report_cpu_dead() reports the final quiescent states. * * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index aa351ddcbe8d..f7206b2623c9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -122,8 +122,6 @@ static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) void rcu_init(void); extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); -void rcu_report_dead(void); -void rcutree_migrate_callbacks(int cpu); #ifdef CONFIG_TASKS_RCU_GENERIC void rcu_init_tasks_generic(void); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 7b949292908a..d9ac7b136aea 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -171,6 +171,6 @@ static inline void rcu_all_qs(void) { barrier(); } #define rcutree_offline_cpu NULL #define rcutree_dead_cpu NULL #define rcutree_dying_cpu NULL -static inline void rcu_cpu_starting(unsigned int cpu) { } +static inline void rcutree_report_cpu_starting(unsigned int cpu) { } #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 46875c4e9f56..254244202ea9 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -110,7 +110,7 @@ void rcu_all_qs(void); /* RCUtree hotplug events */ int rcutree_prepare_cpu(unsigned int cpu); int rcutree_online_cpu(unsigned int cpu); -void rcu_cpu_starting(unsigned int cpu); +void rcutree_report_cpu_starting(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU int rcutree_dead_cpu(unsigned int cpu); @@ -122,4 +122,9 @@ int rcutree_offline_cpu(unsigned int cpu); #define rcutree_offline_cpu NULL #endif +void rcutree_migrate_callbacks(int cpu); + +/* Called from hotplug and also arm64 early secondary boot failure */ +void rcutree_report_cpu_dead(void); + #endif /* __LINUX_RCUTREE_H */ -- cgit v1.2.3 From 5790b1fb3d672d9a1fe3881a7181dfdbe741568f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 4 Oct 2023 16:50:07 -0400 Subject: eventfs: Remove eventfs_file and just use eventfs_inode Instead of having a descriptor for every file represented in the eventfs directory, only have the directory itself represented. Change the API to send in a list of entries that represent all the files in the directory (but not other directories). The entry list contains a name and a callback function that will be used to create the files when they are accessed. struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, const struct eventfs_entry *entries, int size, void *data); is used for the top level eventfs directory, and returns an eventfs_inode that will be used by: struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, const struct eventfs_entry *entries, int size, void *data); where both of the above take an array of struct eventfs_entry entries for every file that is in the directory. The entries are defined by: typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data, const struct file_operations **fops); struct eventfs_entry { const char *name; eventfs_callback callback; }; Where the name is the name of the file and the callback gets called when the file is being created. The callback passes in the name (in case the same callback is used for multiple files), a pointer to the mode, data and fops. The data will be pointing to the data that was passed in eventfs_create_dir() or eventfs_create_events_dir() but may be overridden to point to something else, as it will be used to point to the inode->i_private that is created. The information passed back from the callback is used to create the dentry/inode. If the callback fills the data and the file should be created, it must return a positive number. On zero or negative, the file is ignored. This logic may also be used as a prototype to convert entire pseudo file systems into just-in-time allocation. The "show_events_dentry" file has been updated to show the directories, and any files they have. With just the eventfs_file allocations: Before after deltas for meminfo (in kB): MemFree: -14360 MemAvailable: -14260 Buffers: 40 Cached: 24 Active: 44 Inactive: 48 Inactive(anon): 28 Active(file): 44 Inactive(file): 20 Dirty: -4 AnonPages: 28 Mapped: 4 KReclaimable: 132 Slab: 1604 SReclaimable: 132 SUnreclaim: 1472 Committed_AS: 12 Before after deltas for slabinfo: : [ * = ] ext4_inode_cache 27 [* 1184 = 31968 ] extent_status 102 [* 40 = 4080 ] tracefs_inode_cache 144 [* 656 = 94464 ] buffer_head 39 [* 104 = 4056 ] shmem_inode_cache 49 [* 800 = 39200 ] filp -53 [* 256 = -13568 ] dentry 251 [* 192 = 48192 ] lsm_file_cache 277 [* 32 = 8864 ] vm_area_struct -14 [* 184 = -2576 ] trace_event_file 1748 [* 88 = 153824 ] kmalloc-1k 35 [* 1024 = 35840 ] kmalloc-256 49 [* 256 = 12544 ] kmalloc-192 -28 [* 192 = -5376 ] kmalloc-128 -30 [* 128 = -3840 ] kmalloc-96 10581 [* 96 = 1015776 ] kmalloc-64 3056 [* 64 = 195584 ] kmalloc-32 1291 [* 32 = 41312 ] kmalloc-16 2310 [* 16 = 36960 ] kmalloc-8 9216 [* 8 = 73728 ] Free memory dropped by 14,360 kB Available memory dropped by 14,260 kB Total slab additions in size: 1,771,032 bytes With this change: Before after deltas for meminfo (in kB): MemFree: -12084 MemAvailable: -11976 Buffers: 32 Cached: 32 Active: 72 Inactive: 168 Inactive(anon): 176 Active(file): 72 Inactive(file): -8 Dirty: 24 AnonPages: 196 Mapped: 8 KReclaimable: 148 Slab: 836 SReclaimable: 148 SUnreclaim: 688 Committed_AS: 324 Before after deltas for slabinfo: : [ * = ] tracefs_inode_cache 144 [* 656 = 94464 ] shmem_inode_cache -23 [* 800 = -18400 ] filp -92 [* 256 = -23552 ] dentry 179 [* 192 = 34368 ] lsm_file_cache -3 [* 32 = -96 ] vm_area_struct -13 [* 184 = -2392 ] trace_event_file 1748 [* 88 = 153824 ] kmalloc-1k -49 [* 1024 = -50176 ] kmalloc-256 -27 [* 256 = -6912 ] kmalloc-128 1864 [* 128 = 238592 ] kmalloc-64 4685 [* 64 = 299840 ] kmalloc-32 -72 [* 32 = -2304 ] kmalloc-16 256 [* 16 = 4096 ] total = 721352 Free memory dropped by 12,084 kB Available memory dropped by 11,976 kB Total slab additions in size: 721,352 bytes That's over 2 MB in savings per instance for free and available memory, and over 1 MB in savings per instance of slab memory. Link: https://lore.kernel.org/linux-trace-kernel/20231003184059.4924468e@gandalf.local.home Link: https://lore.kernel.org/linux-trace-kernel/20231004165007.43d79161@gandalf.local.home Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Cc: Ajay Kaher Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 2 +- include/linux/tracefs.h | 29 ++++++++++++++--------------- 2 files changed, 15 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 21ae37e49319..12207dc6722d 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -649,7 +649,7 @@ struct trace_event_file { struct list_head list; struct trace_event_call *event_call; struct event_filter __rcu *filter; - struct eventfs_file *ef; + struct eventfs_inode *ei; struct trace_array *tr; struct trace_subsystem_dir *system; struct list_head triggers; diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h index 009072792fa3..0c39704455d9 100644 --- a/include/linux/tracefs.h +++ b/include/linux/tracefs.h @@ -23,26 +23,25 @@ struct file_operations; struct eventfs_file; -struct dentry *eventfs_create_events_dir(const char *name, - struct dentry *parent); +typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data, + const struct file_operations **fops); -struct eventfs_file *eventfs_add_subsystem_dir(const char *name, - struct dentry *parent); +struct eventfs_entry { + const char *name; + eventfs_callback callback; +}; -struct eventfs_file *eventfs_add_dir(const char *name, - struct eventfs_file *ef_parent); +struct eventfs_inode; -int eventfs_add_file(const char *name, umode_t mode, - struct eventfs_file *ef_parent, void *data, - const struct file_operations *fops); +struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, + const struct eventfs_entry *entries, + int size, void *data); -int eventfs_add_events_file(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops); +struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, + const struct eventfs_entry *entries, + int size, void *data); -void eventfs_remove(struct eventfs_file *ef); - -void eventfs_remove_events_dir(struct dentry *dentry); +void eventfs_remove_dir(struct eventfs_inode *ei); struct dentry *tracefs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, -- cgit v1.2.3 From d7e9a9037de27b642d5a3edef7c69e2a2b460287 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 2 Oct 2023 16:09:35 -0700 Subject: f2fs: Support Block Size == Page Size This allows f2fs to support cases where the block size = page size for both 4K and 16K block sizes. Other sizes should work as well, should the need arise. This does not currently support 4K Block size filesystems if the page size is 16K. Signed-off-by: Daniel Rosenberg Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 69 +++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a82a4bb6ce68..07ed69c2840d 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -13,10 +13,10 @@ #define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ #define F2FS_MIN_LOG_SECTOR_SIZE 9 /* 9 bits for 512 bytes */ -#define F2FS_MAX_LOG_SECTOR_SIZE 12 /* 12 bits for 4096 bytes */ -#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* log number for sector/blk */ -#define F2FS_BLKSIZE 4096 /* support only 4KB block */ -#define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */ +#define F2FS_MAX_LOG_SECTOR_SIZE PAGE_SHIFT /* Max is Block Size */ +#define F2FS_LOG_SECTORS_PER_BLOCK (PAGE_SHIFT - 9) /* log number for sector/blk */ +#define F2FS_BLKSIZE PAGE_SIZE /* support only block == page */ +#define F2FS_BLKSIZE_BITS PAGE_SHIFT /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ #define F2FS_EXTENSION_LEN 8 /* max size of extension */ #define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS) @@ -210,14 +210,14 @@ struct f2fs_checkpoint { unsigned char sit_nat_version_bitmap[]; } __packed; -#define CP_CHKSUM_OFFSET 4092 /* default chksum offset in checkpoint */ +#define CP_CHKSUM_OFFSET (F2FS_BLKSIZE - sizeof(__le32)) /* default chksum offset in checkpoint */ #define CP_MIN_CHKSUM_OFFSET \ (offsetof(struct f2fs_checkpoint, sit_nat_version_bitmap)) /* * For orphan inode management */ -#define F2FS_ORPHANS_PER_BLOCK 1020 +#define F2FS_ORPHANS_PER_BLOCK ((F2FS_BLKSIZE - 4 * sizeof(__le32)) / sizeof(__le32)) #define GET_ORPHAN_BLOCKS(n) (((n) + F2FS_ORPHANS_PER_BLOCK - 1) / \ F2FS_ORPHANS_PER_BLOCK) @@ -243,14 +243,31 @@ struct f2fs_extent { #define F2FS_NAME_LEN 255 /* 200 bytes for inline xattrs by default */ #define DEFAULT_INLINE_XATTR_ADDRS 50 -#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ + +#define OFFSET_OF_END_OF_I_EXT 360 +#define SIZE_OF_I_NID 20 + +struct node_footer { + __le32 nid; /* node id */ + __le32 ino; /* inode number */ + __le32 flag; /* include cold/fsync/dentry marks and offset */ + __le64 cp_ver; /* checkpoint version */ + __le32 next_blkaddr; /* next node page block address */ +} __packed; + +/* Address Pointers in an Inode */ +#define DEF_ADDRS_PER_INODE ((F2FS_BLKSIZE - OFFSET_OF_END_OF_I_EXT \ + - SIZE_OF_I_NID \ + - sizeof(struct node_footer)) / sizeof(__le32)) #define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \ get_extra_isize(inode)) #define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */ #define ADDRS_PER_INODE(inode) addrs_per_inode(inode) -#define DEF_ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ +/* Address Pointers in a Direct Block */ +#define DEF_ADDRS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) #define ADDRS_PER_BLOCK(inode) addrs_per_block(inode) -#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ +/* Node IDs in an Indirect Block */ +#define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) #define ADDRS_PER_PAGE(page, inode) \ (IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK(inode)) @@ -342,14 +359,6 @@ enum { #define OFFSET_BIT_MASK GENMASK(OFFSET_BIT_SHIFT - 1, 0) -struct node_footer { - __le32 nid; /* node id */ - __le32 ino; /* inode number */ - __le32 flag; /* include cold/fsync/dentry marks and offset */ - __le64 cp_ver; /* checkpoint version */ - __le32 next_blkaddr; /* next node page block address */ -} __packed; - struct f2fs_node { /* can be one of three types: inode, direct, and indirect types */ union { @@ -363,7 +372,7 @@ struct f2fs_node { /* * For NAT entries */ -#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) +#define NAT_ENTRY_PER_BLOCK (F2FS_BLKSIZE / sizeof(struct f2fs_nat_entry)) struct f2fs_nat_entry { __u8 version; /* latest version of cached nat entry */ @@ -378,12 +387,13 @@ struct f2fs_nat_block { /* * For SIT entries * - * Each segment is 2MB in size by default so that a bitmap for validity of - * there-in blocks should occupy 64 bytes, 512 bits. + * A validity bitmap of 64 bytes covers 512 blocks of area. For a 4K page size, + * this results in a segment size of 2MB. For 16k pages, the default segment size + * is 8MB. * Not allow to change this. */ #define SIT_VBLOCK_MAP_SIZE 64 -#define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry)) +#define SIT_ENTRY_PER_BLOCK (F2FS_BLKSIZE / sizeof(struct f2fs_sit_entry)) /* * F2FS uses 4 bytes to represent block address. As a result, supported size of @@ -418,7 +428,7 @@ struct f2fs_sit_block { * For segment summary * * One summary block contains exactly 512 summary entries, which represents - * exactly 2MB segment by default. Not allow to change the basic units. + * exactly one segment by default. Not allow to change the basic units. * * NOTE: For initializing fields, you must use set_summary * @@ -429,12 +439,12 @@ struct f2fs_sit_block { * from node's page's beginning to get a data block address. * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) */ -#define ENTRIES_IN_SUM 512 +#define ENTRIES_IN_SUM (F2FS_BLKSIZE / 8) #define SUMMARY_SIZE (7) /* sizeof(struct summary) */ #define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */ #define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) -/* a summary entry for a 4KB-sized block in a segment */ +/* a summary entry for a block in a segment */ struct f2fs_summary { __le32 nid; /* parent node id */ union { @@ -518,7 +528,7 @@ struct f2fs_journal { }; } __packed; -/* 4KB-sized summary block structure */ +/* Block-sized summary block structure */ struct f2fs_summary_block { struct f2fs_summary entries[ENTRIES_IN_SUM]; struct f2fs_journal journal; @@ -559,11 +569,14 @@ typedef __le32 f2fs_hash_t; * Note: there are more reserved space in inline dentry than in regular * dentry, when converting inline dentry we should handle this carefully. */ -#define NR_DENTRY_IN_BLOCK 214 /* the number of dentry in a block */ + +/* the number of dentry in a block */ +#define NR_DENTRY_IN_BLOCK ((BITS_PER_BYTE * F2FS_BLKSIZE) / \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * BITS_PER_BYTE + 1)) #define SIZE_OF_DIR_ENTRY 11 /* by byte */ #define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ BITS_PER_BYTE) -#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ +#define SIZE_OF_RESERVED (F2FS_BLKSIZE - ((SIZE_OF_DIR_ENTRY + \ F2FS_SLOT_LEN) * \ NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) #define MIN_INLINE_DENTRY_SIZE 40 /* just include '.' and '..' entries */ @@ -576,7 +589,7 @@ struct f2fs_dir_entry { __u8 file_type; /* file type */ } __packed; -/* 4KB-sized directory entry block */ +/* Block-sized directory entry block */ struct f2fs_dentry_block { /* validity bitmap for directory entries in each block */ __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; -- cgit v1.2.3 From 1cf56299f9bc7d4b8e1e39af08f01d6380e28173 Mon Sep 17 00:00:00 2001 From: Randy Li Date: Fri, 15 Sep 2023 01:23:23 +0800 Subject: USB: dma: remove unused function prototype usb_buffer_map_sg() and usb_buffer_unmap_sg() have no definition since the beginning of v5.4. The rest are gone from 2.6.12. Signed-off-by: Randy Li Link: https://lore.kernel.org/r/20230914172336.18761-2-ayaka@soulik.info Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index a21074861f91..8c61643acd49 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1823,22 +1823,6 @@ void *usb_alloc_coherent(struct usb_device *dev, size_t size, void usb_free_coherent(struct usb_device *dev, size_t size, void *addr, dma_addr_t dma); -#if 0 -struct urb *usb_buffer_map(struct urb *urb); -void usb_buffer_dmasync(struct urb *urb); -void usb_buffer_unmap(struct urb *urb); -#endif - -struct scatterlist; -int usb_buffer_map_sg(const struct usb_device *dev, int is_in, - struct scatterlist *sg, int nents); -#if 0 -void usb_buffer_dmasync_sg(const struct usb_device *dev, int is_in, - struct scatterlist *sg, int n_hw_ents); -#endif -void usb_buffer_unmap_sg(const struct usb_device *dev, int is_in, - struct scatterlist *sg, int n_hw_ents); - /*-------------------------------------------------------------------* * SYNCHRONOUS CALL SUPPORT * *-------------------------------------------------------------------*/ -- cgit v1.2.3 From 0f28ada1fbf0054557cddcdb93ad17f767105208 Mon Sep 17 00:00:00 2001 From: Jorge Sanjuan Garcia Date: Wed, 6 Sep 2023 11:49:26 +0000 Subject: mcb: remove is_added flag from mcb_device struct When calling mcb_bus_add_devices(), both mcb devices and the mcb bus will attempt to attach a device to a driver because they share the same bus_type. This causes an issue when trying to cast the container of the device to mcb_device struct using to_mcb_device(), leading to a wrong cast when the mcb_bus is added. A crash occurs when freing the ida resources as the bus numbering of mcb_bus gets confused with the is_added flag on the mcb_device struct. The only reason for this cast was to keep an is_added flag on the mcb_device struct that does not seem necessary. The function device_attach() handles already bound devices and the mcb subsystem does nothing special with this is_added flag so remove it completely. Fixes: 18d288198099 ("mcb: Correctly initialize the bus's device") Cc: stable Signed-off-by: Jorge Sanjuan Garcia Co-developed-by: Jose Javier Rodriguez Barbarin Signed-off-by: Jose Javier Rodriguez Barbarin Link: https://lore.kernel.org/r/20230906114901.63174-2-JoseJavier.Rodriguez@duagon.com Signed-off-by: Greg Kroah-Hartman --- include/linux/mcb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mcb.h b/include/linux/mcb.h index 1e5893138afe..0b971b24a804 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -63,7 +63,6 @@ static inline struct mcb_bus *to_mcb_bus(struct device *dev) struct mcb_device { struct device dev; struct mcb_bus *bus; - bool is_added; struct mcb_driver *driver; u16 id; int inst; -- cgit v1.2.3 From e6814ec3ba1994561db9b1c05a80227d30cc18fa Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 21 Jul 2023 09:06:07 +0000 Subject: perf/core: Rename perf_proc_update_handler() -> perf_event_max_sample_rate_handler(), for readability Follow the naming pattern of the other sysctl handlers in perf. Signed-off-by: Xiu Jianfeng Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230721090607.172002-1-xiujianfeng@huawei.com --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e85cd1c0eaf3..f31f962a6445 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1573,7 +1573,7 @@ extern int sysctl_perf_cpu_time_max_percent; extern void perf_sample_event_took(u64 sample_len_ns); -int perf_proc_update_handler(struct ctl_table *table, int write, +int perf_event_max_sample_rate_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -- cgit v1.2.3 From 0cff993e08a7578e2c1df93a95fc5059f447e7ae Mon Sep 17 00:00:00 2001 From: "pangzizhen001@208suo.com" Date: Thu, 20 Jul 2023 23:45:39 +0800 Subject: locking/seqlock: Fix typo in comment s/the the /the [ mingo: Cleaned up the changelog. ] Signed-off-by: Zizhen Pang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/70293ecd5bb7a1cd370fd4d95c35f936@208suo.com --- include/linux/seqlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 987a59d977c5..ea7a58258af6 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -864,7 +864,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) } /* - * For all seqlock_t write side functions, use the the internal + * For all seqlock_t write side functions, use the internal * do_write_seqcount_begin() instead of generic write_seqcount_begin(). * This way, no redundant lockdep_assert_held() checks are added. */ -- cgit v1.2.3 From b83ce9cb4a465b8f9a3fa45561b721a9551f60e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 8 Sep 2023 10:27:23 +0200 Subject: dma-buf: add dma_fence_timestamp helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a fence signals there is a very small race window where the timestamp isn't updated yet. sync_file solves this by busy waiting for the timestamp to appear, but on other ocassions didn't handled this correctly. Provide a dma_fence_timestamp() helper function for this and use it in all appropriate cases. Another alternative would be to grab the spinlock when that happens. v2 by teddy: add a wait parameter to wait for the timestamp to show up, in case the accurate timestamp is needed and/or the timestamp is not based on ktime (e.g. hw timestamp) v3 chk: drop the parameter again for unified handling Signed-off-by: Yunxiang Li Signed-off-by: Christian König Fixes: 1774baa64f93 ("drm/scheduler: Change scheduled fence track v2") Reviewed-by: Alex Deucher CC: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20230929104725.2358-1-christian.koenig@amd.com --- include/linux/dma-fence.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 0d678e9a7b24..ebe78bd3d121 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -568,6 +568,25 @@ static inline void dma_fence_set_error(struct dma_fence *fence, fence->error = error; } +/** + * dma_fence_timestamp - helper to get the completion timestamp of a fence + * @fence: fence to get the timestamp from. + * + * After a fence is signaled the timestamp is updated with the signaling time, + * but setting the timestamp can race with tasks waiting for the signaling. This + * helper busy waits for the correct timestamp to appear. + */ +static inline ktime_t dma_fence_timestamp(struct dma_fence *fence) +{ + if (WARN_ON(!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) + return ktime_get(); + + while (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) + cpu_relax(); + + return fence->timestamp; +} + signed long dma_fence_wait_timeout(struct dma_fence *, bool intr, signed long timeout); signed long dma_fence_wait_any_timeout(struct dma_fence **fences, -- cgit v1.2.3 From a083c755e136844a934bc9b4416cd23b5c19c617 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 8 Sep 2023 22:58:40 +0900 Subject: devres: rename the first parameter of devm_add_action(_or_reset) The first parameter of devm_add_action(_or_reset) is a device. The name 'release' is confusing because it is often used for dr_release_t in the devres context. Rename it to 'dev'. No functional change intended. Signed-off-by: Masahiro Yamada Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230908135840.2362708-1-masahiroy@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 56d93a1ffb7b..d7a72a8749ea 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -389,8 +389,8 @@ void devm_remove_action(struct device *dev, void (*action)(void *), void *data); void devm_release_action(struct device *dev, void (*action)(void *), void *data); int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name); -#define devm_add_action(release, action, data) \ - __devm_add_action(release, action, data, #action) +#define devm_add_action(dev, action, data) \ + __devm_add_action(dev, action, data, #action) static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *), void *data, const char *name) @@ -403,8 +403,8 @@ static inline int __devm_add_action_or_reset(struct device *dev, void (*action)( return ret; } -#define devm_add_action_or_reset(release, action, data) \ - __devm_add_action_or_reset(release, action, data, #action) +#define devm_add_action_or_reset(dev, action, data) \ + __devm_add_action_or_reset(dev, action, data, #action) /** * devm_alloc_percpu - Resource-managed alloc_percpu -- cgit v1.2.3 From 5831fc1fd4a578232fea708b82de0c666ed17153 Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Thu, 28 Sep 2023 16:57:22 +0800 Subject: crypto: hisilicon/qm - fix PF queue parameter issue If the queue isolation feature is enabled, the number of queues supported by the device changes. When PF is enabled using the current default number of queues, the default number of queues may be greater than the number supported by the device. As a result, the PF fails to be bound to the driver. After modification, if queue isolation feature is enabled, when the default queue parameter is greater than the number supported by the device, the number of enabled queues will be changed to the number supported by the device, so that the PF and driver can be properly bound. Fixes: 8bbecfb402f7 ("crypto: hisilicon/qm - add queue isolation support for Kunpeng930") Signed-off-by: Longfang Liu Signed-off-by: Herbert Xu --- include/linux/hisi_acc_qm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 39fbfb4be944..9da4f3f1e6d6 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -144,6 +144,13 @@ enum qm_vf_state { QM_NOT_READY, }; +enum qm_misc_ctl_bits { + QM_DRIVER_REMOVING = 0x0, + QM_RST_SCHED, + QM_RESETTING, + QM_MODULE_PARAM, +}; + enum qm_cap_bits { QM_SUPPORT_DB_ISOLATION = 0x0, QM_SUPPORT_FUNC_QOS, -- cgit v1.2.3 From b42ab1c61a77832040ad42ebf9adf237360e49f7 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Thu, 28 Sep 2023 17:21:47 +0800 Subject: crypto: hisilicon/qm - check function qp num before alg register When the Kunpeng accelerator executes tasks such as encryption and decryption have minimum requirements on the number of device queues. If the number of queues does not meet the requirement, the process initialization will fail. Therefore, the driver checks the number of queues on the device before registering the algorithm. If the number does not meet the requirements, the driver does not register the algorithm to crypto subsystem, the device is still added to the qm_list. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- include/linux/hisi_acc_qm.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 9da4f3f1e6d6..34c64a02712c 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -478,6 +478,20 @@ static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list) mutex_init(&qm_list->lock); } +static inline void hisi_qm_add_list(struct hisi_qm *qm, struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_add_tail(&qm->list, &qm_list->list); + mutex_unlock(&qm_list->lock); +} + +static inline void hisi_qm_del_list(struct hisi_qm *qm, struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_del(&qm->list); + mutex_unlock(&qm_list->lock); +} + int hisi_qm_init(struct hisi_qm *qm); void hisi_qm_uninit(struct hisi_qm *qm); int hisi_qm_start(struct hisi_qm *qm); @@ -523,8 +537,8 @@ int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num); void hisi_qm_dev_shutdown(struct pci_dev *pdev); void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list); -int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list); -void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard); +void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard); int hisi_qm_resume(struct device *dev); int hisi_qm_suspend(struct device *dev); void hisi_qm_pm_uninit(struct hisi_qm *qm); -- cgit v1.2.3 From 8468516f9f93a41dc65158b6428a1a1039c68f20 Mon Sep 17 00:00:00 2001 From: Dimitri John Ledkov Date: Mon, 2 Oct 2023 00:57:15 +0100 Subject: crypto: pkcs7 - remove md4 md5 x.509 support Remove support for md4 md5 hash and signatures in x.509 certificate parsers, pkcs7 signature parser, authenticode parser. All of these are insecure or broken, and everyone has long time ago migrated to alternative hash implementations. Also remove md2 & md3 oids which have already didn't have support. This is also likely the last user of md4 in the kernel, and thus crypto/md4.c and related tests in tcrypt & testmgr can likely be removed. Other users such as cifs smbfs ext modpost sumversions have their own internal implementation as needed. Signed-off-by: Dimitri John Ledkov Reviewed-by: Jarkko Sakkinen Signed-off-by: Herbert Xu --- include/linux/oid_registry.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index f86a08ba0207..4d04fa5d1eec 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -30,9 +30,6 @@ enum OID { /* PKCS#1 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-1(1)} */ OID_rsaEncryption, /* 1.2.840.113549.1.1.1 */ - OID_md2WithRSAEncryption, /* 1.2.840.113549.1.1.2 */ - OID_md3WithRSAEncryption, /* 1.2.840.113549.1.1.3 */ - OID_md4WithRSAEncryption, /* 1.2.840.113549.1.1.4 */ OID_sha1WithRSAEncryption, /* 1.2.840.113549.1.1.5 */ OID_sha256WithRSAEncryption, /* 1.2.840.113549.1.1.11 */ OID_sha384WithRSAEncryption, /* 1.2.840.113549.1.1.12 */ @@ -49,11 +46,6 @@ enum OID { OID_smimeCapabilites, /* 1.2.840.113549.1.9.15 */ OID_smimeAuthenticatedAttrs, /* 1.2.840.113549.1.9.16.2.11 */ - /* {iso(1) member-body(2) us(840) rsadsi(113549) digestAlgorithm(2)} */ - OID_md2, /* 1.2.840.113549.2.2 */ - OID_md4, /* 1.2.840.113549.2.4 */ - OID_md5, /* 1.2.840.113549.2.5 */ - OID_mskrb5, /* 1.2.840.48018.1.2.2 */ OID_krb5, /* 1.2.840.113554.1.2.2 */ OID_krb5u2u, /* 1.2.840.113554.1.2.2.3 */ -- cgit v1.2.3 From 7523d330aac7190f738998a52df8d5aa14293280 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 4 Sep 2023 13:40:46 +0300 Subject: device property: Clarify usage scope of some struct fwnode_handle members Most of the struct fwnode_handle members are for exclusive use with device links framework. Clarify this by adding a respective comment. Signed-off-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Reviewed-by: Sakari Ailus Link: https://lore.kernel.org/r/20230904104046.1682875-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 5700451b300f..2a72f55d26eb 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -41,6 +41,8 @@ struct device; struct fwnode_handle { struct fwnode_handle *secondary; const struct fwnode_operations *ops; + + /* The below is used solely by device links, don't use otherwise */ struct device *dev; struct list_head suppliers; struct list_head consumers; -- cgit v1.2.3 From 1dc05a274a7b13fd61b6c43f0136153752e6f731 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Sep 2023 18:38:19 +0300 Subject: device property: Replace custom implementation of COUNT_ARGS() Replace custom and non-portable implementation of COUNT_ARGS(). Fixes: e64b674bc9d7 ("software node: implement reference properties") Reported-by: Nick Desaulniers Closes: https://lore.kernel.org/r/ZQoILN6QCjzosCOs@google.com Signed-off-by: Andy Shevchenko Reviewed-by: Takashi Iwai Closes: https://github.com/ClangBuiltLinux/linux/issues/1935 Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20230920153819.2069869-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/property.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 8c3c6685a2ae..9f2585d705a8 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -10,6 +10,7 @@ #ifndef _LINUX_PROPERTY_H_ #define _LINUX_PROPERTY_H_ +#include #include #include #include @@ -288,7 +289,7 @@ struct software_node_ref_args { #define SOFTWARE_NODE_REFERENCE(_ref_, ...) \ (const struct software_node_ref_args) { \ .node = _ref_, \ - .nargs = ARRAY_SIZE(((u64[]){ 0, ##__VA_ARGS__ })) - 1, \ + .nargs = COUNT_ARGS(__VA_ARGS__), \ .args = { __VA_ARGS__ }, \ } -- cgit v1.2.3 From a56cc0a8338523f709892696cc229527617c1316 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 3 Oct 2023 15:17:24 +0200 Subject: thermal: core: Add function to walk trips under zone lock Add a wrapper around for_each_thermal_trip(), called thermal_zone_for_each_trip(), that will invoke the former under the thermal zone lock and pass its return value to the caller. Two drivers will be modified subsequently to use this new function. No functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Daniel Lezcano --- include/linux/thermal.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 6710a4ace992..2bab72149bbf 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -286,6 +286,9 @@ int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id, int for_each_thermal_trip(struct thermal_zone_device *tz, int (*cb)(struct thermal_trip *, void *), void *data); +int thermal_zone_for_each_trip(struct thermal_zone_device *tz, + int (*cb)(struct thermal_trip *, void *), + void *data); int thermal_zone_get_num_trips(struct thermal_zone_device *tz); int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp); -- cgit v1.2.3 From 4963e34ce7b95237021575d208fa576f88697839 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 3 Oct 2023 15:25:33 +0200 Subject: thermal: core: Drop thermal_zone_device_exec() Because thermal_zone_device_exec() has no users any more and there are no plans to use it anywhere, revert commit 9a99a996d1ec ("thermal: core: Introduce thermal_zone_device_exec()") that introduced it. No functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano --- include/linux/thermal.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 2bab72149bbf..c8600e313909 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -339,10 +339,6 @@ int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *, enum thermal_notify_event); -void thermal_zone_device_exec(struct thermal_zone_device *tz, - void (*cb)(struct thermal_zone_device *, - unsigned long), - unsigned long data); struct thermal_cooling_device *thermal_cooling_device_register(const char *, void *, const struct thermal_cooling_device_ops *); -- cgit v1.2.3 From c62f5032f72a745542aa6a7e777c7819c96adfbe Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 13 Sep 2023 18:07:01 +0100 Subject: comedi: comedi_8254: Use a call-back function for register access Rework the comedi_8254 module to use a call-back function for register access. This will make it easier to isolate the parts that will depend on the `CONFIG_HAS_IOPORT` macro being defined and also allows the possibility of supplying an external callback function during initialization by a variant of the `comedi_8254_init()` and `comedi_8254_mm_init()` functions, although that has not been implemented yet. The `struct comedi_8254` members have been changed to use a pointer to a callback function and a context of type `unsigned long`. The `comedi_8254_init()` and `comedi_8254_mm_init()` functions use an internal callback function and set the context to the base address of the registers (for `comedi_8254_mm_init()` that involves converting a `void __iomem *` to `unsigned long`). A minor change to `dio200_subdev_8254_offset()` in the amplc_dio200_common module has been made due to the changes in `struct comedi_8254`. Cc: Arnd Bergmann Cc: Niklas Schnelle Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20230913170712.111719-3-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- include/linux/comedi/comedi_8254.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/comedi/comedi_8254.h b/include/linux/comedi/comedi_8254.h index d8264417e53c..18d12321c87d 100644 --- a/include/linux/comedi/comedi_8254.h +++ b/include/linux/comedi/comedi_8254.h @@ -57,10 +57,24 @@ struct comedi_subdevice; /* counter maps zero to 0x10000 */ #define I8254_MAX_COUNT 0x10000 +struct comedi_8254; + +/** + * typedef comedi_8254_iocb_fn - call-back function type for 8254 register access + * @i8254: pointer to struct comedi_8254 + * @dir: direction (0 = read, 1 = write) + * @reg: register number + * @val: value to write + * + * Return: Register value when reading, 0 when writing. + */ +typedef unsigned int comedi_8254_iocb_fn(struct comedi_8254 *i8254, int dir, + unsigned int reg, unsigned int val); + /** * struct comedi_8254 - private data used by this module - * @iobase: PIO base address of the registers (in/out) - * @mmio: MMIO base address of the registers (read/write) + * @iocb: I/O call-back function for register access + * @context: context for register access (e.g. a base address) * @iosize: I/O size used to access the registers (b/w/l) * @regshift: register gap shift * @osc_base: cascaded oscillator speed in ns @@ -76,8 +90,8 @@ struct comedi_subdevice; * @insn_config: driver specific (*insn_config) callback */ struct comedi_8254 { - unsigned long iobase; - void __iomem *mmio; + comedi_8254_iocb_fn *iocb; + unsigned long context; unsigned int iosize; unsigned int regshift; unsigned int osc_base; -- cgit v1.2.3 From fade5e5b0b2a2cc3855f64be6407b0bdcd837714 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 13 Sep 2023 18:07:02 +0100 Subject: comedi: comedi_8254: Replace comedi_8254_init() and comedi_8254_mm_init() `comedi_8254_init()` and `comedi_8254_mm_init()` return `NULL` on failure, but the failure is not necessarily due to lack of memory. Change them to return an `ERR_PTR` value on failure and rename the functions to make it obvious the API has changed. `comedi_8254_init()` has been replaced with `comedi_8254_io_alloc()`, and `comedi_8254_mm_init()` has been replaced with `comedi_8254_mm_alloc()`. Cc: Arnd Bergmann Cc: Niklas Schnelle Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20230913170712.111719-4-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- include/linux/comedi/comedi_8254.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/comedi/comedi_8254.h b/include/linux/comedi/comedi_8254.h index 18d12321c87d..393ccb301028 100644 --- a/include/linux/comedi/comedi_8254.h +++ b/include/linux/comedi/comedi_8254.h @@ -136,13 +136,13 @@ void comedi_8254_set_busy(struct comedi_8254 *i8254, void comedi_8254_subdevice_init(struct comedi_subdevice *s, struct comedi_8254 *i8254); -struct comedi_8254 *comedi_8254_init(unsigned long iobase, - unsigned int osc_base, - unsigned int iosize, - unsigned int regshift); -struct comedi_8254 *comedi_8254_mm_init(void __iomem *mmio, - unsigned int osc_base, - unsigned int iosize, - unsigned int regshift); +struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift); +struct comedi_8254 *comedi_8254_mm_alloc(void __iomem *mmio, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift); #endif /* _COMEDI_8254_H */ -- cgit v1.2.3 From 90d256757e0bffd7a9beafd7c2bdc40a0236f9ec Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 13 Sep 2023 18:07:03 +0100 Subject: comedi: comedi_8254: Conditionally remove I/O port support The comedi_8254 module supports both port I/O and memory-mapped I/O. In a future patch, the port I/O functions (`inb()`, `outb()`, and friends) will only be declared if the `HAS_IOPORT` configuration option is enabled. Conditionally compile the parts of the module that use port I/O so they are compiled if and only if the `CONFIG_HAS_IOPORT` macro is defined, so that it can still be built if the port I/O functions have not been declared. If `CONFIG_HAS_IOPORT` is undefined, replace the GPL-exported `comedi_8254_io_alloc()` function with a dummy static inline version that just returns `ERR_PTR(-ENXIO)`. Cc: Arnd Bergmann Cc: Niklas Schnelle Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20230913170712.111719-5-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- include/linux/comedi/comedi_8254.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/comedi/comedi_8254.h b/include/linux/comedi/comedi_8254.h index 393ccb301028..d527f04400df 100644 --- a/include/linux/comedi/comedi_8254.h +++ b/include/linux/comedi/comedi_8254.h @@ -12,6 +12,8 @@ #define _COMEDI_8254_H #include +#include +#include struct comedi_device; struct comedi_insn; @@ -136,10 +138,21 @@ void comedi_8254_set_busy(struct comedi_8254 *i8254, void comedi_8254_subdevice_init(struct comedi_subdevice *s, struct comedi_8254 *i8254); +#ifdef CONFIG_HAS_IOPORT struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase, unsigned int osc_base, unsigned int iosize, unsigned int regshift); +#else +static inline struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift) +{ + return ERR_PTR(-ENXIO); +} +#endif + struct comedi_8254 *comedi_8254_mm_alloc(void __iomem *mmio, unsigned int osc_base, unsigned int iosize, -- cgit v1.2.3 From 5c57b1ccecc72738d4b9be2dfcdfb9001be76bd7 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 13 Sep 2023 18:07:05 +0100 Subject: comedi: comedi_8255: Rework subdevice initialization functions Comedi drivers can initialize an 8255 subdevice in I/O space by calling `subdev_8255_init()`, or in memory-mapped I/O space by calling `subdev_8255_mm_init()`, or by supplying a call-back function pointer and context to either of those functions. Change it so that a new function `subdev_8255_cb_init()` shall be called instead when supplying a callback function and context, and remove the call-back function parameter from `subdev_8255_init()` and `subdev_8255_mm_init()`. Also rename `subdev_8255_init()` to `subdev_8255_io_init()`. The parameters are changing, so might as well rename it at the same time. Also rename the `regbase` member of `struct subdev_8255_private` to `context` since this holds the context for the call-back function call. Cc: Arnd Bergmann Cc: Niklas Schnelle Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20230913170712.111719-7-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- include/linux/comedi/comedi_8255.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/comedi/comedi_8255.h b/include/linux/comedi/comedi_8255.h index b2a5bc6b3a49..b396fcfbf8b0 100644 --- a/include/linux/comedi/comedi_8255.h +++ b/include/linux/comedi/comedi_8255.h @@ -27,16 +27,17 @@ struct comedi_device; struct comedi_subdevice; -int subdev_8255_init(struct comedi_device *dev, struct comedi_subdevice *s, - int (*io)(struct comedi_device *dev, int dir, int port, - int data, unsigned long regbase), - unsigned long regbase); +int subdev_8255_io_init(struct comedi_device *dev, struct comedi_subdevice *s, + unsigned long regbase); int subdev_8255_mm_init(struct comedi_device *dev, struct comedi_subdevice *s, - int (*io)(struct comedi_device *dev, int dir, int port, - int data, unsigned long regbase), unsigned long regbase); +int subdev_8255_cb_init(struct comedi_device *dev, struct comedi_subdevice *s, + int (*io)(struct comedi_device *dev, int dir, int port, + int data, unsigned long context), + unsigned long context); + unsigned long subdev_8255_regbase(struct comedi_subdevice *s); #endif -- cgit v1.2.3 From 7187a0939a1773e6a2663a02a6c456047a5e6289 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 13 Sep 2023 18:07:06 +0100 Subject: comedi: comedi_8255: Conditionally remove I/O port support In a future patch, the port I/O functions (`inb()`, `outb()`, and friends will only be declared in the `HAS_IOPORT` configuration option is enabled. The comedi_8255 module supports both port I/O and memory-mapped I/O. Conditionally compile the parts of the module that use port I/O if and only if the `CONFIG_HAS_IOPORT` macro is defined so that it can still be built if the port I/O functions have not been declared. If the `CONFIG_HAS_IOPORT` macro is undefined, replace the GPL-exported `subdev_8255_io_init()` function with a dummy static inline version that just returns `-ENXIO`. Cc: Arnd Bergmann Cc: Niklas Schnelle Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20230913170712.111719-8-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- include/linux/comedi/comedi_8255.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/comedi/comedi_8255.h b/include/linux/comedi/comedi_8255.h index b396fcfbf8b0..d24a69da389b 100644 --- a/include/linux/comedi/comedi_8255.h +++ b/include/linux/comedi/comedi_8255.h @@ -10,6 +10,8 @@ #ifndef _COMEDI_8255_H #define _COMEDI_8255_H +#include + #define I8255_SIZE 0x04 #define I8255_DATA_A_REG 0x00 @@ -27,8 +29,17 @@ struct comedi_device; struct comedi_subdevice; +#ifdef CONFIG_HAS_IOPORT int subdev_8255_io_init(struct comedi_device *dev, struct comedi_subdevice *s, unsigned long regbase); +#else +static inline int subdev_8255_io_init(struct comedi_device *dev, + struct comedi_subdevice *s, + unsigned long regbase) +{ + return -ENXIO; +} +#endif int subdev_8255_mm_init(struct comedi_device *dev, struct comedi_subdevice *s, unsigned long regbase); -- cgit v1.2.3 From 77f048bcbf07f7dc961f3b2b7815038b5405ec60 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 30 Sep 2023 11:14:47 +0200 Subject: comedi: Annotate struct comedi_lrange with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). Signed-off-by: Christophe JAILLET Reviewed-by: Kees Cook Reviewed-by: "Gustavo A. R. Silva" Link: https://lore.kernel.org/r/5c3b7459b820e22e2ac6ce892d4aadcc119cc919.1696065263.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- include/linux/comedi/comedidev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/comedi/comedidev.h b/include/linux/comedi/comedidev.h index 0a1150900ef3..c08416a7364b 100644 --- a/include/linux/comedi/comedidev.h +++ b/include/linux/comedi/comedidev.h @@ -633,7 +633,7 @@ extern const struct comedi_lrange range_unknown; */ struct comedi_lrange { int length; - struct comedi_krange range[]; + struct comedi_krange range[] __counted_by(length); }; /** -- cgit v1.2.3 From 0fedefd4c4e33dd24f726b13b5d7c143e2b483be Mon Sep 17 00:00:00 2001 From: Valentine Sinitsyn Date: Mon, 25 Sep 2023 11:40:12 +0300 Subject: kernfs: sysfs: support custom llseek method for sysfs entries As of now, seeking in sysfs files is handled by generic_file_llseek(). There are situations where one may want to customize seeking logic: - Many sysfs entries are fixed files while generic_file_llseek() accepts past-the-end positions. Not only being useless by itself, this also means a bug in userspace code will trigger not at lseek(), but at some later point making debugging harder. - generic_file_llseek() relies on f_mapping->host to get the file size which might not be correct for all sysfs entries. See commit 636b21b50152 ("PCI: Revoke mappings like devmem") as an example. Implement llseek method to override this behavior at sysfs attribute level. The method is optional, and if it is absent, generic_file_llseek() is called to preserve backwards compatibility. Signed-off-by: Valentine Sinitsyn Link: https://lore.kernel.org/r/20230925084013.309399-1-valesini@yandex-team.ru Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 1 + include/linux/sysfs.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 2a36f3218b51..99aaa050ccb7 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -316,6 +316,7 @@ struct kernfs_ops { struct poll_table_struct *pt); int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma); + loff_t (*llseek)(struct kernfs_open_file *of, loff_t offset, int whence); }; /* diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index fd3fe5c8c17f..b717a70219f6 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -181,6 +181,8 @@ struct bin_attribute { char *, loff_t, size_t); ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *, char *, loff_t, size_t); + loff_t (*llseek)(struct file *, struct kobject *, struct bin_attribute *, + loff_t, int); int (*mmap)(struct file *, struct kobject *, struct bin_attribute *attr, struct vm_area_struct *vma); }; -- cgit v1.2.3 From eb7581deb4c2eef77f6368e1891e123b69349bb0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 Sep 2023 19:53:12 +0300 Subject: resource: Constify resource crosscheck APIs Constify APIs: _contains(), _overlaps(), _intersection(), _union(). Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230912165312.402422-3-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/ioport.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 25d768d48970..14f5cfabbbc8 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -229,7 +229,7 @@ static inline unsigned long resource_ext_type(const struct resource *res) return res->flags & IORESOURCE_EXT_TYPE_BITS; } /* True iff r1 completely contains r2 */ -static inline bool resource_contains(struct resource *r1, struct resource *r2) +static inline bool resource_contains(const struct resource *r1, const struct resource *r2) { if (resource_type(r1) != resource_type(r2)) return false; @@ -239,13 +239,13 @@ static inline bool resource_contains(struct resource *r1, struct resource *r2) } /* True if any part of r1 overlaps r2 */ -static inline bool resource_overlaps(struct resource *r1, struct resource *r2) +static inline bool resource_overlaps(const struct resource *r1, const struct resource *r2) { return r1->start <= r2->end && r1->end >= r2->start; } -static inline bool -resource_intersection(struct resource *r1, struct resource *r2, struct resource *r) +static inline bool resource_intersection(const struct resource *r1, const struct resource *r2, + struct resource *r) { if (!resource_overlaps(r1, r2)) return false; @@ -254,8 +254,8 @@ resource_intersection(struct resource *r1, struct resource *r2, struct resource return true; } -static inline bool -resource_union(struct resource *r1, struct resource *r2, struct resource *r) +static inline bool resource_union(const struct resource *r1, const struct resource *r2, + struct resource *r) { if (!resource_overlaps(r1, r2)) return false; -- cgit v1.2.3 From 8a76356e7db02ec7b1913db06605e70294d94672 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 27 Sep 2023 11:27:41 +0300 Subject: iio: improve doc for available_scan_mask The available_scan_mask is an array of bitmaps representing the channels which can be simultaneously enabled by the driver. In many cases, the hardware can offer more channels than what the user is interested in obtaining. In such cases, it may be preferred that only a subset of channels are enabled, and the driver reads only a subset of the channels from the hardware. Some devices can't support all channel combinations. For example, the BM1390 pressure sensor must always read the pressure data in order to acknowledge the watermark IRQ, while reading temperature can be omitted. So, the available scan masks would be 'pressure and temperature' and 'pressure only'. When IIO searches for the scan mask it asks the driver to use, it will pick the first suitable one from the 'available_scan_mask' array. Hence, ordering the masks in the array makes a difference. We should 'prefer' reading just the pressure from the hardware (as it is a cheaper operation than reading both pressure and temperature) over reading both pressure and temperature. Hence, we should set the 'only pressure' as the first scan mask in available_scan_mask array. If we set the 'pressure and temperature' as first in the array, then the 'only temperature' will never get used as 'pressure and temperature' can always serve the user's needs. Add (minimal) kerneldoc to the 'available_scan_mask' to hint the user that the ordering of masks matters. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/4e43bf0186df5c8a56b470318b4827605f9cad6c.1695727471.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 202e55b0a28b..7bfa1b9bc8a2 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -556,7 +556,9 @@ struct iio_buffer_setup_ops { * and owner * @buffer: [DRIVER] any buffer present * @scan_bytes: [INTERN] num bytes captured to be fed to buffer demux - * @available_scan_masks: [DRIVER] optional array of allowed bitmasks + * @available_scan_masks: [DRIVER] optional array of allowed bitmasks. Sort the + * array in order of preference, the most preferred + * masks first. * @masklength: [INTERN] the length of the mask established from * channels * @active_scan_mask: [INTERN] union of all scan masks requested by buffers -- cgit v1.2.3 From b3a4dbc89d4021b3f90ff6a13537111a004f9d07 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Wed, 4 Oct 2023 20:05:31 -0400 Subject: io_uring/kbuf: Use slab for struct io_buffer objects The allocation of struct io_buffer for metadata of provided buffers is done through a custom allocator that directly gets pages and fragments them. But, slab would do just fine, as this is not a hot path (in fact, it is a deprecated feature) and, by keeping a custom allocator implementation we lose benefits like tracking, poisoning, sanitizers. Finally, the custom code is more complex and requires keeping the list of pages in struct ctx for no good reason. This patch cleans this path up and just uses slab. I microbenchmarked it by forcing the allocation of a large number of objects with the least number of io_uring commands possible (keeping nbufs=USHRT_MAX), with and without the patch. There is a slight increase in time spent in the allocation with slab, of course, but even when allocating to system resources exhaustion, which is not very realistic and happened around 1/2 billion provided buffers for me, it wasn't a significant hit in system time. Specially if we think of a real-world scenario, an application doing register/unregister of provided buffers will hit ctx->io_buffers_cache more often than actually going to slab. Signed-off-by: Gabriel Krisman Bertazi Link: https://lore.kernel.org/r/20231005000531.30800-4-krisman@suse.de Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index e178461fa513..e4e67899b134 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -350,8 +350,6 @@ struct io_ring_ctx { struct wait_queue_head rsrc_quiesce_wq; unsigned rsrc_quiesce; - struct list_head io_buffers_pages; - #if defined(CONFIG_UNIX) struct socket *ring_sock; #endif -- cgit v1.2.3 From 2819f23ac12ce93ff79ca7a54597df9a4a1f6331 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 5 Oct 2023 09:13:48 -0400 Subject: eventfs: Use eventfs_remove_events_dir() The update to removing the eventfs_file changed the way the events top level directory was handled. Instead of returning a dentry, it now returns the eventfs_inode. In this changed, the removing of the events top level directory is not much different than removing any of the other directories. Because of this, the removal just called eventfs_remove_dir() instead of eventfs_remove_events_dir(). Although eventfs_remove_dir() does the clean up, it misses out on the dget() of the ei->dentry done in eventfs_create_events_dir(). It makes more sense to match eventfs_create_events_dir() with a specific function eventfs_remove_events_dir() and this specific function can then perform the dput() to the dentry that had the dget() when it was created. Fixes: 5790b1fb3d67 ("eventfs: Remove eventfs_file and just use eventfs_inode") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310051743.y9EobbUr-lkp@intel.com/ Signed-off-by: Steven Rostedt (Google) --- include/linux/tracefs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h index 0c39704455d9..13359b1a35d1 100644 --- a/include/linux/tracefs.h +++ b/include/linux/tracefs.h @@ -41,6 +41,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode const struct eventfs_entry *entries, int size, void *data); +void eventfs_remove_events_dir(struct eventfs_inode *ei); void eventfs_remove_dir(struct eventfs_inode *ei); struct dentry *tracefs_create_file(const char *name, umode_t mode, -- cgit v1.2.3 From 7e6f3b6d2c352b5fde37ce3fed83bdf6172eebd4 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Wed, 27 Sep 2023 13:22:12 -0700 Subject: PCI: Prevent xHCI driver from claiming AMD VanGogh USB3 DRD device The AMD VanGogh SoC contains a DesignWare USB3 Dual-Role Device that can be operated as either a USB Host or a USB Device, similar to on the AMD Nolan platform. be6646bfbaec ("PCI: Prevent xHCI driver from claiming AMD Nolan USB3 DRD device") added a quirk to let the dwc3 driver claim the Nolan device since it provides more specific support. Extend that quirk to include the VanGogh SoC USB3 device. Link: https://lore.kernel.org/r/20230927202212.2388216-1-vi@endrift.com Signed-off-by: Vicki Pfau [bhelgaas: include be6646bfbaec reference, add stable tag] Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v3.19+ --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 5fb3d4c393a9..3a8e24e9a93f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -579,6 +579,7 @@ #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3 #define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb #define PCI_DEVICE_ID_AMD_MI200_DF_F3 0x14d3 +#define PCI_DEVICE_ID_AMD_VANGOGH_USB 0x163a #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 -- cgit v1.2.3 From 5b9ceb63c49b9934cf2ec70b3b76951927e50a24 Mon Sep 17 00:00:00 2001 From: John Sanpe Date: Fri, 15 Sep 2023 22:06:50 +0800 Subject: logic_pio: Remove logic_outb(), _outw(), outl() duplicate declarations Remove duplicate declarations of logic_out* functions. Link: https://lore.kernel.org/r/20230915140650.3562504-1-sanpeqf@gmail.com Signed-off-by: John Sanpe Signed-off-by: Bjorn Helgaas --- include/linux/logic_pio.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/logic_pio.h b/include/linux/logic_pio.h index 54945aa824b4..babf4e3c28ba 100644 --- a/include/linux/logic_pio.h +++ b/include/linux/logic_pio.h @@ -39,9 +39,6 @@ struct logic_pio_host_ops { #ifdef CONFIG_INDIRECT_PIO u8 logic_inb(unsigned long addr); -void logic_outb(u8 value, unsigned long addr); -void logic_outw(u16 value, unsigned long addr); -void logic_outl(u32 value, unsigned long addr); u16 logic_inw(unsigned long addr); u32 logic_inl(unsigned long addr); void logic_outb(u8 value, unsigned long addr); -- cgit v1.2.3 From 7151d87a175c6618fe81705755eb3dc4199cad4e Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 27 Sep 2023 10:31:30 +0200 Subject: virtchnl: Add header dependencies The uses BIT, struct_size and ETH_ALEN macros but does not include appropriate header files that defines them. Add these dependencies so this header file can be included anywhere. Signed-off-by: Ivan Vecera Reviewed-by: Przemek Kitszel Reviewed-by: Jesse Brandeburg Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index dd71d3009771..6b3acf15be5c 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -4,6 +4,10 @@ #ifndef _VIRTCHNL_H_ #define _VIRTCHNL_H_ +#include +#include +#include + /* Description: * This header file describes the Virtual Function (VF) - Physical Function * (PF) communication protocol used by the drivers for all devices starting -- cgit v1.2.3 From 9beebc2b5d0038a65977a7a14909598c64ce070f Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 28 Sep 2023 09:24:28 +0200 Subject: can: dev: add can_state_get_by_berr_counter() to return the CAN state based on the current error counters Some CAN controllers do not have a register that contains the current CAN state, but only a register that contains the error counters. Introduce a new function can_state_get_by_berr_counter() that returns the current TX and RX state depending on the provided CAN bit error counters. Link: https://lore.kernel.org/all/20231005-at91_can-rx_offload-v2-1-9987d53600e0@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 982ba245eb41..1b92aed49363 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -195,6 +195,10 @@ int can_restart_now(struct net_device *dev); void can_bus_off(struct net_device *dev); const char *can_get_state_str(const enum can_state state); +void can_state_get_by_berr_counter(const struct net_device *dev, + const struct can_berr_counter *bec, + enum can_state *tx_state, + enum can_state *rx_state); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state); -- cgit v1.2.3 From 9c8c3fa3a52bc55696ccc4dfcb8a49f969b5fb0e Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 5 Oct 2023 16:21:36 +0900 Subject: bpf: Fix the comment for bpf_restore_data_end() The comment used to say: > Restore data saved by bpf_compute_data_pointers(). But bpf_compute_data_pointers() does not save the data; bpf_compute_and_save_data_end() does. Signed-off-by: Akihiko Odaki Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231005072137.29870-1-akihiko.odaki@daynix.com Signed-off-by: Martin KaFai Lau --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 27406aee2d40..ff7ecc89d3dd 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -736,7 +736,7 @@ static inline void bpf_compute_and_save_data_end( cb->data_end = skb->data + skb_headlen(skb); } -/* Restore data saved by bpf_compute_data_pointers(). */ +/* Restore data saved by bpf_compute_and_save_data_end(). */ static inline void bpf_restore_data_end( struct sk_buff *skb, void *saved_data_end) { -- cgit v1.2.3 From 3fbc5c3b8522d655cf91d32c158261060fdc02fe Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 25 Sep 2023 15:17:07 +0200 Subject: PM: domains: Introduce dev_pm_domain_set_performance_state() The generic PM domain is currently the only PM domain variant that supports performance scaling. To allow performance scaling to be supported through a common interface, let's add an optional callback ->set_performance_state(), in the struct dev_pm_domain. Moreover, let's add a function, dev_pm_domain_set_performance_state(), that may be called by consumers to request a new performance state for a device through its PM domain. Note that, in most cases it's preferred that a consumer use the OPP library to request a new performance state for its device. Although, this requires some additional changes to be supported, which are being implemented from subsequent changes. Signed-off-by: Ulf Hansson Acked-by: Rafael J. Wysocki Signed-off-by: Viresh Kumar --- include/linux/pm.h | 2 ++ include/linux/pm_domain.h | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 1400c37b29c7..4c9f571609c8 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -719,6 +719,7 @@ extern void dev_pm_put_subsys_data(struct device *dev); * @activate: Called before executing probe routines for bus types and drivers. * @sync: Called after successful driver probe. * @dismiss: Called after unsuccessful driver probe and after driver removal. + * @set_performance_state: Called to request a new performance state. * * Power domains provide callbacks that are executed during system suspend, * hibernation, system resume and during runtime PM transitions instead of @@ -731,6 +732,7 @@ struct dev_pm_domain { int (*activate)(struct device *dev); void (*sync)(struct device *dev); void (*dismiss)(struct device *dev); + int (*set_performance_state)(struct device *dev, unsigned int state); }; /* diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f776fb93eaa0..bda2964a0a56 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -430,6 +430,7 @@ struct device *dev_pm_domain_attach_by_name(struct device *dev, void dev_pm_domain_detach(struct device *dev, bool power_off); int dev_pm_domain_start(struct device *dev); void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd); +int dev_pm_domain_set_performance_state(struct device *dev, unsigned int state); #else static inline int dev_pm_domain_attach(struct device *dev, bool power_on) { @@ -452,6 +453,11 @@ static inline int dev_pm_domain_start(struct device *dev) } static inline void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd) {} +static inline int dev_pm_domain_set_performance_state(struct device *dev, + unsigned int state) +{ + return 0; +} #endif #endif /* _LINUX_PM_DOMAIN_H */ -- cgit v1.2.3 From 248a38d5cc3f3505e6cfbbc0514435c9f1ba00af Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 25 Sep 2023 15:17:09 +0200 Subject: OPP: Add dev_pm_opp_add_dynamic() to allow more flexibility The dev_pm_opp_add() API is limited to add dynamic OPPs with a frequency and a voltage level. To enable more flexibility, let's add a new API, dev_pm_opp_add_dynamic() that's takes a struct dev_pm_opp_data* instead of a list of in-parameters. Signed-off-by: Ulf Hansson Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 91f87d7e807c..a8ee93ba41d8 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -92,6 +92,16 @@ struct dev_pm_opp_config { struct device ***virt_devs; }; +/** + * struct dev_pm_opp_data - The data to use to initialize an OPP. + * @freq: The clock rate in Hz for the OPP. + * @u_volt: The voltage in uV for the OPP. + */ +struct dev_pm_opp_data { + unsigned long freq; + unsigned long u_volt; +}; + #if defined(CONFIG_PM_OPP) struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); @@ -152,8 +162,8 @@ struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, void dev_pm_opp_put(struct dev_pm_opp *opp); -int dev_pm_opp_add(struct device *dev, unsigned long freq, - unsigned long u_volt); +int dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp); + void dev_pm_opp_remove(struct device *dev, unsigned long freq); void dev_pm_opp_remove_all_dynamic(struct device *dev); @@ -322,8 +332,8 @@ static inline struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {} -static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, - unsigned long u_volt) +static inline int +dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp) { return -EOPNOTSUPP; } @@ -519,6 +529,17 @@ static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_ta /* OPP Configuration helpers */ +static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, + unsigned long u_volt) +{ + struct dev_pm_opp_data data = { + .freq = freq, + .u_volt = u_volt, + }; + + return dev_pm_opp_add_dynamic(dev, &data); +} + /* Regulators helpers */ static inline int dev_pm_opp_set_regulators(struct device *dev, const char * const names[]) -- cgit v1.2.3 From a0242c81bb759ef03184be8eddcc7d5bdf36cc16 Mon Sep 17 00:00:00 2001 From: Krishna chaitanya chundru Date: Thu, 7 Sep 2023 11:30:31 +0530 Subject: OPP: Add dev_pm_opp_find_level_floor() Add dev_pm_opp_find_level_floor(), as is done for frequency and bandwidth. Signed-off-by: Krishna chaitanya chundru [ Viresh: Updated commit log and rearranged code ] Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 9ad168f4cbf1..ccd97bcef269 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -156,6 +156,9 @@ struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, unsigned int *level); +struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, + unsigned long *level); + struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, unsigned int *bw, int index); @@ -320,6 +323,12 @@ static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } +static inline struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, + unsigned long *level) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, unsigned int *bw, int index) { -- cgit v1.2.3 From 3166383da081461244918aeed7ad028ef11b17cc Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 25 Sep 2023 15:17:10 +0200 Subject: OPP: Extend dev_pm_opp_data with a level Let's extend the dev_pm_opp_data with a level variable, to allow users to specify a corresponding level (performance state) for a dynamically added OPP. Signed-off-by: Ulf Hansson Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index a8ee93ba41d8..9ad168f4cbf1 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -94,10 +94,12 @@ struct dev_pm_opp_config { /** * struct dev_pm_opp_data - The data to use to initialize an OPP. + * @level: The performance level for the OPP. * @freq: The clock rate in Hz for the OPP. * @u_volt: The voltage in uV for the OPP. */ struct dev_pm_opp_data { + unsigned int level; unsigned long freq; unsigned long u_volt; }; -- cgit v1.2.3 From 869b6ea1609f655a43251bf41757aa44e5350a8f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 4 Oct 2023 15:32:01 +0200 Subject: quota: Fix slow quotaoff Eric has reported that commit dabc8b207566 ("quota: fix dqput() to follow the guarantees dquot_srcu should provide") heavily increases runtime of generic/270 xfstest for ext4 in nojournal mode. The reason for this is that ext4 in nojournal mode leaves dquots dirty until the last dqput() and thus the cleanup done in quota_release_workfn() has to write them all. Due to the way quota_release_workfn() is written this results in synchronize_srcu() call for each dirty dquot which makes the dquot cleanup when turning quotas off extremely slow. To be able to avoid synchronize_srcu() for each dirty dquot we need to rework how we track dquots to be cleaned up. Instead of keeping the last dquot reference while it is on releasing_dquots list, we drop it right away and mark the dquot with new DQ_RELEASING_B bit instead. This way we can we can remove dquot from releasing_dquots list when new reference to it is acquired and thus there's no need to call synchronize_srcu() each time we drop dq_list_lock. References: https://lore.kernel.org/all/ZRytn6CxFK2oECUt@debian-BULLSEYE-live-builder-AMD64 Reported-by: Eric Whitney Fixes: dabc8b207566 ("quota: fix dqput() to follow the guarantees dquot_srcu should provide") CC: stable@vger.kernel.org Signed-off-by: Jan Kara --- include/linux/quota.h | 4 +++- include/linux/quotaops.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index fd692b4a41d5..07071e64abf3 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -285,7 +285,9 @@ static inline void dqstats_dec(unsigned int type) #define DQ_FAKE_B 3 /* no limits only usage */ #define DQ_READ_B 4 /* dquot was read into memory */ #define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */ -#define DQ_LASTSET_B 6 /* Following 6 bits (see QIF_) are reserved\ +#define DQ_RELEASING_B 6 /* dquot is in releasing_dquots list waiting + * to be cleaned up */ +#define DQ_LASTSET_B 7 /* Following 6 bits (see QIF_) are reserved\ * for the mask of entries set via SETQUOTA\ * quotactl. They are set under dq_data_lock\ * and the quota format handling dquot can\ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 11a4becff3a9..4fa4ef0a173a 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -57,7 +57,7 @@ static inline bool dquot_is_busy(struct dquot *dquot) { if (test_bit(DQ_MOD_B, &dquot->dq_flags)) return true; - if (atomic_read(&dquot->dq_count) > 1) + if (atomic_read(&dquot->dq_count) > 0) return true; return false; } -- cgit v1.2.3 From 5a0b11a180a9b82b4437a4be1cf73530053f139b Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Fri, 6 Oct 2023 09:57:02 +0000 Subject: iommu/amd: Remove iommu_v2 module AMD GPU driver which was the only in-kernel user of iommu_v2 module removed dependency on iommu_v2 module. Also we are working on adding SVA support in AMD IOMMU driver. Device drivers are expected to use common SVA framework to enable device PASID/PRI features. Removing iommu_v2 module and then adding SVA simplifies the development. Hence remove iommu_v2 module. Cc: Alex Deucher Cc: Joerg Roedel Cc: Felix Kuehling Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Reviewed-by: Jerry Snitselaar Tested-by: Alex Deucher Link: https://lore.kernel.org/r/20231006095706.5694-2-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- include/linux/amd-iommu.h | 94 ----------------------------------------------- 1 file changed, 94 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 99a5201d9e62..35ca00b09384 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -33,84 +33,6 @@ struct pci_dev; extern int amd_iommu_detect(void); -/** - * amd_iommu_init_device() - Init device for use with IOMMUv2 driver - * @pdev: The PCI device to initialize - * @pasids: Number of PASIDs to support for this device - * - * This function does all setup for the device pdev so that it can be - * used with IOMMUv2. - * Returns 0 on success or negative value on error. - */ -extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids); - -/** - * amd_iommu_free_device() - Free all IOMMUv2 related device resources - * and disable IOMMUv2 usage for this device - * @pdev: The PCI device to disable IOMMUv2 usage for' - */ -extern void amd_iommu_free_device(struct pci_dev *pdev); - -/** - * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device - * @pdev: The PCI device to bind the task to - * @pasid: The PASID on the device the task should be bound to - * @task: the task to bind - * - * The function returns 0 on success or a negative value on error. - */ -extern int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, - struct task_struct *task); - -/** - * amd_iommu_unbind_pasid() - Unbind a PASID from its task on - * a device - * @pdev: The device of the PASID - * @pasid: The PASID to unbind - * - * When this function returns the device is no longer using the PASID - * and the PASID is no longer bound to its task. - */ -extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid); - -/** - * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed - * PRI requests - * @pdev: The PCI device the call-back should be registered for - * @cb: The call-back function - * - * The IOMMUv2 driver invokes this call-back when it is unable to - * successfully handle a PRI request. The device driver can then decide - * which PRI response the device should see. Possible return values for - * the call-back are: - * - * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device - * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device - * - AMD_IOMMU_INV_PRI_RSP_FAIL - Send Failure back to the device, - * the device is required to disable - * PRI when it receives this response - * - * The function returns 0 on success or negative value on error. - */ -#define AMD_IOMMU_INV_PRI_RSP_SUCCESS 0 -#define AMD_IOMMU_INV_PRI_RSP_INVALID 1 -#define AMD_IOMMU_INV_PRI_RSP_FAIL 2 - -typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev, - u32 pasid, - unsigned long address, - u16); - -extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, - amd_iommu_invalid_ppr_cb cb); - -#define PPR_FAULT_EXEC (1 << 1) -#define PPR_FAULT_READ (1 << 2) -#define PPR_FAULT_WRITE (1 << 5) -#define PPR_FAULT_USER (1 << 6) -#define PPR_FAULT_RSVD (1 << 7) -#define PPR_FAULT_GN (1 << 8) - /** * amd_iommu_device_info() - Get information about IOMMUv2 support of a * PCI device @@ -137,22 +59,6 @@ struct amd_iommu_device_info { extern int amd_iommu_device_info(struct pci_dev *pdev, struct amd_iommu_device_info *info); -/** - * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating - * a pasid context. This call-back is - * invoked when the IOMMUv2 driver needs to - * invalidate a PASID context, for example - * because the task that is bound to that - * context is about to exit. - * - * @pdev: The PCI device the call-back should be registered for - * @cb: The call-back function - */ - -typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, u32 pasid); - -extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, - amd_iommu_invalidate_ctx cb); #else /* CONFIG_AMD_IOMMU */ static inline int amd_iommu_detect(void) { return -ENODEV; } -- cgit v1.2.3 From 37b282fa04dd7b9bfa7c290fe07ef1730444debb Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Fri, 6 Oct 2023 09:57:04 +0000 Subject: iommu/amd: Remove amd_iommu_device_info() No one is using this function. Hence remove it. Also move PCI device feature detection flags to amd_iommu_types.h as its only used inside AMD IOMMU driver. Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Reviewed-by: Jerry Snitselaar Tested-by: Alex Deucher Link: https://lore.kernel.org/r/20231006095706.5694-4-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- include/linux/amd-iommu.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 35ca00b09384..dc7ed2f46886 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -33,32 +33,6 @@ struct pci_dev; extern int amd_iommu_detect(void); -/** - * amd_iommu_device_info() - Get information about IOMMUv2 support of a - * PCI device - * @pdev: PCI device to query information from - * @info: A pointer to an amd_iommu_device_info structure which will contain - * the information about the PCI device - * - * Returns 0 on success, negative value on error - */ - -#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */ -#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */ -#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */ -#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 /* Device may request execution - on memory pages */ -#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 /* Device may request - super-user privileges */ - -struct amd_iommu_device_info { - int max_pasids; - u32 flags; -}; - -extern int amd_iommu_device_info(struct pci_dev *pdev, - struct amd_iommu_device_info *info); - #else /* CONFIG_AMD_IOMMU */ static inline int amd_iommu_detect(void) { return -ENODEV; } -- cgit v1.2.3 From 1609626c32c4538439f6333d0b6c912af9f13b77 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:44:54 +0100 Subject: firmware: arm_ffa: Update the FF-A command list with v1.1 additions Arm Firmware Framework for A-profile(FFA) v1.1 introduces notifications and indirect messaging based upon notifications support and extends some of the memory interfaces. Let us add all the newly supported FF-A function IDs in the spec. Also update to the error values and associated handling. Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-1-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- include/linux/arm_ffa.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index cc060da51bec..2ea1717a0825 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -20,6 +20,7 @@ #define FFA_ERROR FFA_SMC_32(0x60) #define FFA_SUCCESS FFA_SMC_32(0x61) +#define FFA_FN64_SUCCESS FFA_SMC_64(0x61) #define FFA_INTERRUPT FFA_SMC_32(0x62) #define FFA_VERSION FFA_SMC_32(0x63) #define FFA_FEATURES FFA_SMC_32(0x64) @@ -54,6 +55,23 @@ #define FFA_MEM_FRAG_RX FFA_SMC_32(0x7A) #define FFA_MEM_FRAG_TX FFA_SMC_32(0x7B) #define FFA_NORMAL_WORLD_RESUME FFA_SMC_32(0x7C) +#define FFA_NOTIFICATION_BITMAP_CREATE FFA_SMC_32(0x7D) +#define FFA_NOTIFICATION_BITMAP_DESTROY FFA_SMC_32(0x7E) +#define FFA_NOTIFICATION_BIND FFA_SMC_32(0x7F) +#define FFA_NOTIFICATION_UNBIND FFA_SMC_32(0x80) +#define FFA_NOTIFICATION_SET FFA_SMC_32(0x81) +#define FFA_NOTIFICATION_GET FFA_SMC_32(0x82) +#define FFA_NOTIFICATION_INFO_GET FFA_SMC_32(0x83) +#define FFA_FN64_NOTIFICATION_INFO_GET FFA_SMC_64(0x83) +#define FFA_RX_ACQUIRE FFA_SMC_32(0x84) +#define FFA_SPM_ID_GET FFA_SMC_32(0x85) +#define FFA_MSG_SEND2 FFA_SMC_32(0x86) +#define FFA_SECONDARY_EP_REGISTER FFA_SMC_32(0x87) +#define FFA_FN64_SECONDARY_EP_REGISTER FFA_SMC_64(0x87) +#define FFA_MEM_PERM_GET FFA_SMC_32(0x88) +#define FFA_FN64_MEM_PERM_GET FFA_SMC_64(0x88) +#define FFA_MEM_PERM_SET FFA_SMC_32(0x89) +#define FFA_FN64_MEM_PERM_SET FFA_SMC_64(0x89) /* * For some calls it is necessary to use SMC64 to pass or return 64-bit values. @@ -76,6 +94,7 @@ #define FFA_RET_DENIED (-6) #define FFA_RET_RETRY (-7) #define FFA_RET_ABORTED (-8) +#define FFA_RET_NO_DATA (-9) /* FFA version encoding */ #define FFA_MAJOR_VERSION_MASK GENMASK(30, 16) @@ -86,6 +105,7 @@ (FIELD_PREP(FFA_MAJOR_VERSION_MASK, (major)) | \ FIELD_PREP(FFA_MINOR_VERSION_MASK, (minor))) #define FFA_VERSION_1_0 FFA_PACK_VERSION_INFO(1, 0) +#define FFA_VERSION_1_1 FFA_PACK_VERSION_INFO(1, 1) /** * FF-A specification mentions explicitly about '4K pages'. This should -- cgit v1.2.3 From fe2ddb6b42358ad25a6aed30512fb284522335f3 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:44:57 +0100 Subject: firmware: arm_ffa: Implement the FFA_RUN interface FFA_RUN is used by a scheduler to allocate CPU cycles to a target endpoint execution context specified in the target information parameter. If the endpoint execution context is in the waiting/blocked state, it transitions to the running state. Expose the ability to call FFA_RUN in order to give any partition in the system cpu cycles to perform IMPDEF functionality. Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-4-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- include/linux/arm_ffa.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 2ea1717a0825..12fd134bf670 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -387,10 +387,15 @@ struct ffa_mem_ops { int (*memory_lend)(struct ffa_mem_ops_args *args); }; +struct ffa_cpu_ops { + int (*run)(struct ffa_device *dev, u16 vcpu); +}; + struct ffa_ops { const struct ffa_info_ops *info_ops; const struct ffa_msg_ops *msg_ops; const struct ffa_mem_ops *mem_ops; + const struct ffa_cpu_ops *cpu_ops; }; #endif /* _LINUX_ARM_FFA_H */ -- cgit v1.2.3 From 0184450b8b1e7734110472616c4758839e1aff96 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:45:02 +0100 Subject: firmware: arm_ffa: Add schedule receiver callback mechanism Enable client drivers to register a callback function that will be called when one or more notifications are pending for a target partition as part of schedule receiver interrupt handling. Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-9-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- include/linux/arm_ffa.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 12fd134bf670..f9cf6114ef82 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -391,11 +391,19 @@ struct ffa_cpu_ops { int (*run)(struct ffa_device *dev, u16 vcpu); }; +typedef void (*ffa_sched_recv_cb)(u16 vcpu, bool is_per_vcpu, void *cb_data); +struct ffa_notifier_ops { + int (*sched_recv_cb_register)(struct ffa_device *dev, + ffa_sched_recv_cb cb, void *cb_data); + int (*sched_recv_cb_unregister)(struct ffa_device *dev); +}; + struct ffa_ops { const struct ffa_info_ops *info_ops; const struct ffa_msg_ops *msg_ops; const struct ffa_mem_ops *mem_ops; const struct ffa_cpu_ops *cpu_ops; + const struct ffa_notifier_ops *notifier_ops; }; #endif /* _LINUX_ARM_FFA_H */ -- cgit v1.2.3 From e0573444edbf4ee7e3c191d3d08a4ccbd26628be Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:45:03 +0100 Subject: firmware: arm_ffa: Add interfaces to request notification callbacks Add interface to the FFA driver to allow for client drivers to request and relinquish a notification as well as provide a callback for the notification. Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-10-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- include/linux/arm_ffa.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index f9cf6114ef82..fb6f388a3737 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -392,10 +392,15 @@ struct ffa_cpu_ops { }; typedef void (*ffa_sched_recv_cb)(u16 vcpu, bool is_per_vcpu, void *cb_data); +typedef void (*ffa_notifier_cb)(int notify_id, void *cb_data); + struct ffa_notifier_ops { int (*sched_recv_cb_register)(struct ffa_device *dev, ffa_sched_recv_cb cb, void *cb_data); int (*sched_recv_cb_unregister)(struct ffa_device *dev); + int (*notify_request)(struct ffa_device *dev, bool per_vcpu, + ffa_notifier_cb cb, void *cb_data, int notify_id); + int (*notify_relinquish)(struct ffa_device *dev, int notify_id); }; struct ffa_ops { -- cgit v1.2.3 From e5adb3b20e39dbf18651322a9bc24eb55050188f Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:45:04 +0100 Subject: firmware: arm_ffa: Add interface to send a notification to a given partition The framework provides an interface to the sender endpoint to specify the notification to signal to the receiver endpoint. A sender signals a notification by requesting its partition manager to set the corresponding bit in the notifications bitmap of the receiver. Expose the ability to send a notification to another partition. Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-11-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- include/linux/arm_ffa.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index fb6f388a3737..f6df81f14b6d 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -401,6 +401,8 @@ struct ffa_notifier_ops { int (*notify_request)(struct ffa_device *dev, bool per_vcpu, ffa_notifier_cb cb, void *cb_data, int notify_id); int (*notify_relinquish)(struct ffa_device *dev, int notify_id); + int (*notify_send)(struct ffa_device *dev, int notify_id, bool per_vcpu, + u16 vcpu); }; struct ffa_ops { -- cgit v1.2.3 From c9b21ef0d0a87695d7bfeee9a04b89760b49ccf5 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:45:06 +0100 Subject: firmware: arm_ffa: Simplify the computation of transmit and fragment length The computation of endpoint memory access descriptor's composite memory region descriptor offset is using COMPOSITE_CONSTITUENTS_OFFSET which is unnecessary complicated. Composite memory region descriptor always follow the endpoint memory access descriptor array and hence it is computed accordingly. COMPOSITE_CONSTITUENTS_OFFSET is useless and wrong for any input other than endpoint memory access descriptor count. Let us drop the usage of COMPOSITE_CONSTITUENTS_OFFSET to simplify the computation of total transmit and fragment length in the memory transactions. Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-13-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- include/linux/arm_ffa.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index f6df81f14b6d..748d0a83a4bc 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -356,8 +356,6 @@ struct ffa_mem_region { (offsetof(struct ffa_mem_region, ep_mem_access[x])) #define CONSTITUENTS_OFFSET(x) \ (offsetof(struct ffa_composite_mem_region, constituents[x])) -#define COMPOSITE_CONSTITUENTS_OFFSET(x, y) \ - (COMPOSITE_OFFSET(x) + CONSTITUENTS_OFFSET(y)) struct ffa_mem_ops_args { bool use_txbuf; -- cgit v1.2.3 From bc5bc309db45a7ab218ce8259ba9bc7659be61ca Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Thu, 5 Oct 2023 08:41:23 +0000 Subject: bpf: Inherit system settings for CPU security mitigations Currently, there exists a system-wide setting related to CPU security mitigations, denoted as 'mitigations='. When set to 'mitigations=off', it deactivates all optional CPU mitigations. Therefore, if we implement a system-wide 'mitigations=off' setting, it should inherently bypass Spectre v1 and Spectre v4 in the BPF subsystem. Please note that there is also a more specific 'nospectre_v1' setting on x86 and ppc architectures, though it is not currently exported. For the time being, let's disregard more fine-grained options. This idea emerged during our discussion about potential Spectre v1 attacks with Luis [0]. [0] https://lore.kernel.org/bpf/b4fc15f7-b204-767e-ebb9-fdb4233961fb@iogearbox.net Signed-off-by: Yafang Shao Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Acked-by: Song Liu Acked-by: KP Singh Cc: Luis Gerhorst Link: https://lore.kernel.org/bpf/20231005084123.1338-1-laoar.shao@gmail.com --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a82efd34b741..61bde4520f5c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2164,12 +2164,12 @@ static inline bool bpf_allow_uninit_stack(void) static inline bool bpf_bypass_spec_v1(void) { - return perfmon_capable(); + return perfmon_capable() || cpu_mitigations_off(); } static inline bool bpf_bypass_spec_v4(void) { - return perfmon_capable(); + return perfmon_capable() || cpu_mitigations_off(); } int bpf_map_new_fd(struct bpf_map *map, int flags); -- cgit v1.2.3 From e7a1b32e43b194bbf930281ae7f5149c420cd122 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Thu, 5 Oct 2023 15:41:20 +0200 Subject: cpufreq: Rebuild sched-domains when removing cpufreq driver The Energy Aware Scheduler (EAS) relies on the schedutil governor. When moving to/from the schedutil governor, sched domains must be rebuilt to allow re-evaluating the enablement conditions of EAS. This is done through sched_cpufreq_governor_change(). Having a cpufreq governor assumes a cpufreq driver is running. Inserting/removing a cpufreq driver should trigger a re-evaluation of EAS enablement conditions, avoiding to see EAS enabled when removing a running cpufreq driver. Rebuild the sched domains in schedutil's sugov_init()/sugov_exit(), allowing to check EAS's enablement condition whenever schedutil governor is initialized/exited from. Move relevant code up in schedutil.c to avoid a split and conditional function declaration. Rename sched_cpufreq_governor_change() to sugov_eas_rebuild_sd(). Signed-off-by: Pierre Gondois Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 71d186d6933a..1c5ca92a0555 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1193,14 +1193,6 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ } #endif -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) -void sched_cpufreq_governor_change(struct cpufreq_policy *policy, - struct cpufreq_governor *old_gov); -#else -static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy, - struct cpufreq_governor *old_gov) { } -#endif - extern unsigned int arch_freq_get_on_cpu(int cpu); #ifndef arch_set_freq_scale -- cgit v1.2.3 From 51a23b1be92046f0cc52384d30cf060700f1a54e Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 2 Aug 2023 17:28:56 +0800 Subject: acpi,mm: fix typo sibiling -> sibling First found this typo as reviewing memory tier code. Fix it by sed like: $ sed -i 's/sibiling/sibling/g' $(git grep -l sibiling) so the acpi one will be corrected as well. Link: https://lkml.kernel.org/r/20230802092856.819328-1-lizhijian@cn.fujitsu.com Signed-off-by: Li Zhijian Cc: Aneesh Kumar K.V Cc: Huang, Ying Cc: Len Brown Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton --- include/linux/memory-tiers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 437441cdf78f..4fa178b50784 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -22,7 +22,7 @@ struct memory_tier; struct memory_dev_type { /* list of memory types that are part of same tier as this type */ - struct list_head tier_sibiling; + struct list_head tier_sibling; /* abstract distance for this specific memory type */ int adistance; /* Nodes of same abstract distance */ -- cgit v1.2.3 From 55d2a0bd5eadaade850efa9d3a7ffbb0aeb67198 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 18 Sep 2023 14:31:42 +0800 Subject: mm: add statistics for PUD level pagetable Recently, we found that cross-die access to pagetable pages on ARM64 machines can cause performance fluctuations in our business. Currently, there are no PMU events available to track this situation on our ARM64 machines, so accurate pagetable accounting can help to analyze this issue, but now the PUD level pagetable accounting is missed. So introduce pagetable_pud_ctor/dtor() to help to get accurate PUD pagetable accounting, as well as converting the architectures which use generic PUD pagetable allocation to add corresponding PUD pagetable accounting. Moreover this patch will mark the PUD level pagetable with PG_table flag, which will help to do sanity validation in unpoison_memory(). On my testing machine, I can see more pagetables statistics after the patch with page-types tool: Before patch: flags page-count MB symbolic-flags long-symbolic-flags 0x0000000004000000 27326 106 __________________________g_________________ pgtable After patch: 0x0000000004000000 27541 107 __________________________g_________________ pgtable Link: https://lkml.kernel.org/r/876c71c03a7e69c17722a690e3225a4f7b172fb2.1695017383.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Acked-by: Mike Rapoport (IBM) Acked-by: Vishal Moola (Oracle) Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Matthew Wilcox (Oracle) Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/mm.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 31dc25d3f6b5..126b54b45442 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3049,6 +3049,22 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) return ptl; } +static inline void pagetable_pud_ctor(struct ptdesc *ptdesc) +{ + struct folio *folio = ptdesc_folio(ptdesc); + + __folio_set_pgtable(folio); + lruvec_stat_add_folio(folio, NR_PAGETABLE); +} + +static inline void pagetable_pud_dtor(struct ptdesc *ptdesc) +{ + struct folio *folio = ptdesc_folio(ptdesc); + + __folio_clear_pgtable(folio); + lruvec_stat_sub_folio(folio, NR_PAGETABLE); +} + extern void __init pagecache_init(void); extern void free_initmem(void); -- cgit v1.2.3 From 7ced098fcfe596feab3cea4f40128b0119c7bf1a Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 18 Sep 2023 22:18:32 +0200 Subject: mm: document mmu_notifier_invalidate_range_start_nonblock() Document what mmu_notifier_invalidate_range_start_nonblock() is for. Also add a __must_check annotation to signal that callers must bail out if a notifier vetoes the operation. Link: https://lkml.kernel.org/r/20230918201832.265108-1-jannh@google.com Signed-off-by: Jann Horn Reviewed-by: Jason Gunthorpe Reviewed-by: Alistair Popple Signed-off-by: Andrew Morton --- include/linux/mmu_notifier.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 6e3c857606f1..f349e08a9dfe 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -459,7 +459,14 @@ mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) lock_map_release(&__mmu_notifier_invalidate_range_start_map); } -static inline int +/* + * This version of mmu_notifier_invalidate_range_start() avoids blocking, but it + * can return an error if a notifier can't proceed without blocking, in which + * case you're not allowed to modify PTEs in the specified range. + * + * This is mainly intended for OOM handling. + */ +static inline int __must_check mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range) { int ret = 0; -- cgit v1.2.3 From 24e41bf8a6b424c76c5902fb999e9eca61bdf83d Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Mon, 28 Aug 2023 17:08:57 +0200 Subject: mm: add a NO_INHERIT flag to the PR_SET_MDWE prctl This extends the current PR_SET_MDWE prctl arg with a bit to indicate that the process doesn't want MDWE protection to propagate to children. To implement this no-inherit mode, the tag in current->mm->flags must be absent from MMF_INIT_MASK. This means that the encoding for "MDWE but without inherit" is different in the prctl than in the mm flags. This leads to a bit of bit-mangling in the prctl implementation. Link: https://lkml.kernel.org/r/20230828150858.393570-6-revest@chromium.org Signed-off-by: Florent Revest Reviewed-by: Kees Cook Reviewed-by: Catalin Marinas Cc: Alexey Izbyshev Cc: Anshuman Khandual Cc: Ayush Jain Cc: David Hildenbrand Cc: Greg Thelen Cc: Joey Gouly Cc: KP Singh Cc: Mark Brown Cc: Michal Hocko Cc: Peter Xu Cc: Ryan Roberts Cc: Szabolcs Nagy Cc: Topi Miettinen Signed-off-by: Andrew Morton --- include/linux/sched/coredump.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 0ee96ea7a0e9..1b37fa8fc723 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -91,4 +91,14 @@ static inline int get_dumpable(struct mm_struct *mm) MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK) #define MMF_VM_MERGE_ANY 29 +#define MMF_HAS_MDWE_NO_INHERIT 30 + +static inline unsigned long mmf_init_flags(unsigned long flags) +{ + if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) + flags &= ~((1UL << MMF_HAS_MDWE) | + (1UL << MMF_HAS_MDWE_NO_INHERIT)); + return flags & MMF_INIT_MASK; +} + #endif /* _LINUX_SCHED_COREDUMP_H */ -- cgit v1.2.3 From 7e845ecb2fbfa1bf800e703df29ee2e06592c2a0 Mon Sep 17 00:00:00 2001 From: Sui Jingfeng Date: Wed, 30 Aug 2023 19:15:28 +0800 Subject: PCI: Add pci_is_vga() helper The PCI Code and ID Assignment spec, r1.15, secs 1.4 and 1.1, define VGA Base Class and Sub-Classes: 03 00 PCI_CLASS_DISPLAY_VGA VGA-compatible or 8514-compatible 00 01 PCI_CLASS_NOT_DEFINED_VGA VGA-compatible (before Class Code) Add a pci_is_vga() helper to return true if a device is in either category. These VGA devices use the hardwired legacy VGA resources ([mem 0xa0000-0xbffff], [io 0x3b0-0x3bb], [io 0x3c0-0x3df] and aliases), so they require special handling if more than one is present in the system. Link: https://lore.kernel.org/r/20230830111532.444535-2-sui.jingfeng@linux.dev Signed-off-by: Sui Jingfeng [bhelgaas: commit log, drop !pdev test] Signed-off-by: Bjorn Helgaas Cc: "Maciej W. Rozycki" --- include/linux/pci.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 8c7c2c3c6c65..7bab234391cb 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -713,6 +713,30 @@ static inline bool pci_is_bridge(struct pci_dev *dev) dev->hdr_type == PCI_HEADER_TYPE_CARDBUS; } +/** + * pci_is_vga - check if the PCI device is a VGA device + * + * The PCI Code and ID Assignment spec, r1.15, secs 1.4 and 1.1, define + * VGA Base Class and Sub-Classes: + * + * 03 00 PCI_CLASS_DISPLAY_VGA VGA-compatible or 8514-compatible + * 00 01 PCI_CLASS_NOT_DEFINED_VGA VGA-compatible (before Class Code) + * + * Return true if the PCI device is a VGA device and uses the legacy VGA + * resources ([mem 0xa0000-0xbffff], [io 0x3b0-0x3bb], [io 0x3c0-0x3df] and + * aliases). + */ +static inline bool pci_is_vga(struct pci_dev *pdev) +{ + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + return true; + + if ((pdev->class >> 8) == PCI_CLASS_NOT_DEFINED_VGA) + return true; + + return false; +} + #define for_each_pci_bridge(dev, bus) \ list_for_each_entry(dev, &bus->devices, bus_list) \ if (!pci_is_bridge(dev)) {} else -- cgit v1.2.3 From 3abbd0699b678fc48e0100704338cff9180fe4bb Mon Sep 17 00:00:00 2001 From: Giulio Benetti Date: Thu, 5 Oct 2023 20:29:15 +0200 Subject: net: phy: broadcom: add support for BCM5221 phy This patch adds the BCM5221 PHY support by reusing brcm_fet_*() callbacks and adding quirks for BCM5221 when needed. Cc: Jim Reinhart Cc: James Autry Cc: Matthew Maron Signed-off-by: Giulio Benetti Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20231005182915.153815-1-giulio.benetti@benettiengineering.com Signed-off-by: Jakub Kicinski --- include/linux/brcmphy.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index c55810a43541..1394ba302367 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -11,6 +11,7 @@ #define PHY_ID_BCM50610 0x0143bd60 #define PHY_ID_BCM50610M 0x0143bd70 +#define PHY_ID_BCM5221 0x004061e0 #define PHY_ID_BCM5241 0x0143bc30 #define PHY_ID_BCMAC131 0x0143bc70 #define PHY_ID_BCM5481 0x0143bca0 @@ -331,6 +332,15 @@ #define BCM54XX_WOL_INT_STATUS (MII_BCM54XX_EXP_SEL_WOL + 0x94) +/* BCM5221 Registers */ +#define BCM5221_AEGSR 0x1C +#define BCM5221_AEGSR_MDIX_STATUS BIT(13) +#define BCM5221_AEGSR_MDIX_MAN_SWAP BIT(12) +#define BCM5221_AEGSR_MDIX_DIS BIT(11) + +#define BCM5221_SHDW_AM4_EN_CLK_LPM BIT(2) +#define BCM5221_SHDW_AM4_FORCE_LPM BIT(1) + /*****************************************************************************/ /* Fast Ethernet Transceiver definitions. */ /*****************************************************************************/ -- cgit v1.2.3 From 77bbfe607b1d306c88bf96fed00c030f6bf462f1 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 4 Oct 2023 07:42:23 +0800 Subject: firmware: arm_scmi: Add support for clock parents SCMI v3.2 spec introduces CLOCK_POSSIBLE_PARENTS_GET, CLOCK_PARENT_SET and CLOCK_PARENT_GET. Add support for these to enable clock parents and use them in the clock driver. Reviewed-by: Cristian Marussi Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20231004-scmi-clock-v3-v5-1-1b8a1435673e@nxp.com Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 27bfa5a65b45..f2f05fb42d28 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -58,6 +58,8 @@ struct scmi_clock_info { u64 step_size; } range; }; + int num_parents; + u32 *parents; }; enum scmi_power_scale { @@ -83,6 +85,8 @@ struct scmi_protocol_handle; * @state_get: get the status of the specified clock * @config_oem_get: get the value of an OEM specific clock config * @config_oem_set: set the value of an OEM specific clock config + * @parent_get: get the parent id of a clk + * @parent_set: set the parent of a clock */ struct scmi_clk_proto_ops { int (*count_get)(const struct scmi_protocol_handle *ph); @@ -104,6 +108,8 @@ struct scmi_clk_proto_ops { bool atomic); int (*config_oem_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u8 oem_type, u32 oem_val, bool atomic); + int (*parent_get)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 *parent_id); + int (*parent_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 parent_id); }; struct scmi_perf_domain_info { -- cgit v1.2.3 From 76cf932c95b9e7c07b065b5c71e56957e2826ae2 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:45:07 +0100 Subject: KVM: arm64: FFA: Remove access of endpoint memory access descriptor array FF-A v1.1 removes the fixed location of endpoint memory access descriptor array within the memory transaction descriptor structure. In preparation to remove the ep_mem_access member from the ffa_mem_region structure, provide the accessor to fetch the offset and use the same in FF-A proxy implementation. The accessor take the FF-A version as the argument from which the memory access descriptor format can be determined. v1.0 uses the old format while v1.1 onwards use the new format specified in the v1.1 specification. Cc: Oliver Upton Cc: Will Deacon Cc: Quentin Perret Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-14-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- include/linux/arm_ffa.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 748d0a83a4bc..2444d596b703 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -357,6 +357,12 @@ struct ffa_mem_region { #define CONSTITUENTS_OFFSET(x) \ (offsetof(struct ffa_composite_mem_region, constituents[x])) +static inline u32 +ffa_mem_desc_offset(struct ffa_mem_region *buf, int count, u32 ffa_version) +{ + return COMPOSITE_OFFSET(0); +} + struct ffa_mem_ops_args { bool use_txbuf; u32 nattrs; -- cgit v1.2.3 From 113580530ee7dc61e668b641d657920734533b9f Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:45:09 +0100 Subject: firmware: arm_ffa: Update memory descriptor to support v1.1 format Update memory transaction descriptor structure to accommodate couple of new entries in v1.1 which were previously marked reserved and MBZ(must be zero). It also removes the flexible array member ep_mem_access in the memory transaction descriptor structure as it need not be at fixed offset. Also update ffa_mem_desc_offset() accessor to handle both old and new formats of memory transaction descriptors. The updated ffa_mem_region structure aligns with new format in v1.1 and hence the driver/user must take care not to use members beyond and including ep_mem_offset when using the old format. Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-16-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- include/linux/arm_ffa.h | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 2444d596b703..1abedb5b2e48 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -6,6 +6,7 @@ #ifndef _LINUX_ARM_FFA_H #define _LINUX_ARM_FFA_H +#include #include #include #include @@ -298,8 +299,8 @@ struct ffa_mem_region { #define FFA_MEM_NON_SHAREABLE (0) #define FFA_MEM_OUTER_SHAREABLE (2) #define FFA_MEM_INNER_SHAREABLE (3) - u8 attributes; - u8 reserved_0; + /* Memory region attributes, upper byte MBZ pre v1.1 */ + u16 attributes; /* * Clear memory region contents after unmapping it from the sender and * before mapping it for any receiver. @@ -337,30 +338,40 @@ struct ffa_mem_region { * memory region. */ u64 tag; - u32 reserved_1; + /* Size of each endpoint memory access descriptor, MBZ pre v1.1 */ + u32 ep_mem_size; /* * The number of `ffa_mem_region_attributes` entries included in this * transaction. */ u32 ep_count; /* - * An array of endpoint memory access descriptors. - * Each one specifies a memory region offset, an endpoint and the - * attributes with which this memory region should be mapped in that - * endpoint's page table. + * 16-byte aligned offset from the base address of this descriptor + * to the first element of the endpoint memory access descriptor array + * Valid only from v1.1 */ - struct ffa_mem_region_attributes ep_mem_access[]; + u32 ep_mem_offset; + /* MBZ, valid only from v1.1 */ + u32 reserved[3]; }; -#define COMPOSITE_OFFSET(x) \ - (offsetof(struct ffa_mem_region, ep_mem_access[x])) #define CONSTITUENTS_OFFSET(x) \ (offsetof(struct ffa_composite_mem_region, constituents[x])) static inline u32 ffa_mem_desc_offset(struct ffa_mem_region *buf, int count, u32 ffa_version) { - return COMPOSITE_OFFSET(0); + u32 offset = count * sizeof(struct ffa_mem_region_attributes); + /* + * Earlier to v1.1, the endpoint memory descriptor array started at + * offset 32(i.e. offset of ep_mem_offset in the current structure) + */ + if (ffa_version <= FFA_VERSION_1_0) + offset += offsetof(struct ffa_mem_region, ep_mem_offset); + else + offset += sizeof(struct ffa_mem_region); + + return offset; } struct ffa_mem_ops_args { -- cgit v1.2.3 From 3e7807d5a7d770c59837026e9967fe99ad043174 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 4 Oct 2023 22:55:32 -0400 Subject: fscrypt: rename fscrypt_info => fscrypt_inode_info We are going to track per-extent information, so it'll be necessary to distinguish between inode infos and extent infos. Rename fscrypt_info to fscrypt_inode_info, adjusting any lines that now exceed 80 characters. Signed-off-by: Josef Bacik [ebiggers: rebased onto fscrypt tree, renamed fscrypt_get_info(), adjusted two comments, and fixed some lines over 80 characters] Link: https://lore.kernel.org/r/20231005025757.33521-1-ebiggers@kernel.org Reviewed-by: Neal Gompa Signed-off-by: Eric Biggers --- include/linux/fs.h | 4 ++-- include/linux/fscrypt.h | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b528f063e8ff..a3df96736473 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -67,7 +67,7 @@ struct swap_info_struct; struct seq_file; struct workqueue_struct; struct iov_iter; -struct fscrypt_info; +struct fscrypt_inode_info; struct fscrypt_operations; struct fsverity_info; struct fsverity_operations; @@ -738,7 +738,7 @@ struct inode { #endif #ifdef CONFIG_FS_ENCRYPTION - struct fscrypt_info *i_crypt_info; + struct fscrypt_inode_info *i_crypt_info; #endif #ifdef CONFIG_FS_VERITY diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index b559e6f77707..12f9e455d569 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -31,7 +31,7 @@ #define FSCRYPT_CONTENTS_ALIGNMENT 16 union fscrypt_policy; -struct fscrypt_info; +struct fscrypt_inode_info; struct fs_parameter; struct seq_file; @@ -192,7 +192,8 @@ struct fscrypt_operations { unsigned int *num_devs); }; -static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) +static inline struct fscrypt_inode_info * +fscrypt_get_inode_info(const struct inode *inode) { /* * Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info(). @@ -404,7 +405,8 @@ static inline void fscrypt_set_ops(struct super_block *sb, } #else /* !CONFIG_FS_ENCRYPTION */ -static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) +static inline struct fscrypt_inode_info * +fscrypt_get_inode_info(const struct inode *inode) { return NULL; } @@ -882,7 +884,7 @@ static inline bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode) */ static inline bool fscrypt_has_encryption_key(const struct inode *inode) { - return fscrypt_get_info(inode) != NULL; + return fscrypt_get_inode_info(inode) != NULL; } /** -- cgit v1.2.3 From 4a530cb932af31b0c919a109bc107dd186653381 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:50:53 -0700 Subject: hwmon: Annotate struct gsc_hwmon_platform_data with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct gsc_hwmon_platform_data. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Tim Harvey Reviewed-by: "Gustavo A. R. Silva" Link: https://lore.kernel.org/r/20230922175053.work.564-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/platform_data/gsc_hwmon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/gsc_hwmon.h b/include/linux/platform_data/gsc_hwmon.h index f2781aa7eff8..70e8a6bec0f6 100644 --- a/include/linux/platform_data/gsc_hwmon.h +++ b/include/linux/platform_data/gsc_hwmon.h @@ -40,6 +40,6 @@ struct gsc_hwmon_platform_data { unsigned int resolution; unsigned int vreference; unsigned int fan_base; - struct gsc_hwmon_channel channels[]; + struct gsc_hwmon_channel channels[] __counted_by(nchannels); }; #endif -- cgit v1.2.3 From a48e1f656b3c9b8192b6ca6fc92ef4daa30535fb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:51:21 -0700 Subject: KVM: Annotate struct kvm_irq_routing_table with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct kvm_irq_routing_table. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Paolo Bonzini Cc: kvm@vger.kernel.org Reviewed-by: "Gustavo A. R. Silva" Link: https://lore.kernel.org/r/20230922175121.work.660-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fb6c6109fdca..4944136efaa2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -664,7 +664,7 @@ struct kvm_irq_routing_table { * Array indexed by gsi. Each entry contains list of irq chips * the gsi is connected to. */ - struct hlist_head map[]; + struct hlist_head map[] __counted_by(nr_rt_entries); }; #endif -- cgit v1.2.3 From 313ebe47d75558511aa1237b6e35c663b5c0ec6f Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 20 Sep 2023 14:36:09 +0200 Subject: string.h: add array-wrappers for (v)memdup_user() Currently, user array duplications are sometimes done without an overflow check. Sometimes the checks are done manually; sometimes the array size is calculated with array_size() and sometimes by calculating n * size directly in code. Introduce wrappers for arrays for memdup_user() and vmemdup_user() to provide a standardized and safe way for duplicating user arrays. This is both for new code as well as replacing usage of (v)memdup_user() in existing code that uses, e.g., n * size to calculate array sizes. Suggested-by: David Airlie Signed-off-by: Philipp Stanner Reviewed-by: Andy Shevchenko Reviewed-by: Kees Cook Reviewed-by: Zack Rusin Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230920123612.16914-3-pstanner@redhat.com --- include/linux/string.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index dbfc66400050..debf4ef1098f 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -5,7 +5,9 @@ #include /* for inline */ #include /* for size_t */ #include /* for NULL */ +#include /* for ERR_PTR() */ #include /* for E2BIG */ +#include /* for check_mul_overflow() */ #include #include @@ -14,6 +16,44 @@ extern void *memdup_user(const void __user *, size_t); extern void *vmemdup_user(const void __user *, size_t); extern void *memdup_user_nul(const void __user *, size_t); +/** + * memdup_array_user - duplicate array from user space + * @src: source address in user space + * @n: number of array members to copy + * @size: size of one array member + * + * Return: an ERR_PTR() on failure. Result is physically + * contiguous, to be freed by kfree(). + */ +static inline void *memdup_array_user(const void __user *src, size_t n, size_t size) +{ + size_t nbytes; + + if (check_mul_overflow(n, size, &nbytes)) + return ERR_PTR(-EOVERFLOW); + + return memdup_user(src, nbytes); +} + +/** + * vmemdup_array_user - duplicate array from user space + * @src: source address in user space + * @n: number of array members to copy + * @size: size of one array member + * + * Return: an ERR_PTR() on failure. Result may be not + * physically contiguous. Use kvfree() to free. + */ +static inline void *vmemdup_array_user(const void __user *src, size_t n, size_t size) +{ + size_t nbytes; + + if (check_mul_overflow(n, size, &nbytes)) + return ERR_PTR(-EOVERFLOW); + + return vmemdup_user(src, nbytes); +} + /* * Include machine specific inline routines */ -- cgit v1.2.3 From 6d0d419914286a1b255530812a38d992c6c4e608 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:03:06 +0100 Subject: iov_iter, net: Move csum_and_copy_to/from_iter() to net/ Move csum_and_copy_to/from_iter() to net code now that the iteration framework can be #included. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230925120309.1731676-10-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: netdev@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/skbuff.h | 25 +++++++++++++++++++++++++ include/linux/uio.h | 18 ------------------ 2 files changed, 25 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4174c4b82d13..d0656cc11c16 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3679,6 +3679,31 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l return __skb_put_padto(skb, len, true); } +static inline __wsum csum_and_memcpy(void *to, const void *from, size_t len, + __wsum sum, size_t off) +{ + __wsum next = csum_partial_copy_nocheck(from, to, len); + return csum_block_add(sum, next, off); +} + +struct csum_state { + __wsum csum; + size_t off; +}; + +size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); + +static __always_inline __must_check +bool csum_and_copy_from_iter_full(void *addr, size_t bytes, + __wsum *csum, struct iov_iter *i) +{ + size_t copied = csum_and_copy_from_iter(addr, bytes, csum, i); + if (likely(copied == bytes)) + return true; + iov_iter_revert(i, copied); + return false; +} + static inline int skb_add_data(struct sk_buff *skb, struct iov_iter *from, int copy) { diff --git a/include/linux/uio.h b/include/linux/uio.h index 65d9143f83c8..0a5426c97e02 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -338,24 +338,6 @@ iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes) return npages; } -struct csum_state { - __wsum csum; - size_t off; -}; - -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csstate, struct iov_iter *i); -size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); - -static __always_inline __must_check -bool csum_and_copy_from_iter_full(void *addr, size_t bytes, - __wsum *csum, struct iov_iter *i) -{ - size_t copied = csum_and_copy_from_iter(addr, bytes, csum, i); - if (likely(copied == bytes)) - return true; - iov_iter_revert(i, copied); - return false; -} size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, struct iov_iter *i); -- cgit v1.2.3 From dc32bff195b45e8571c442954beee259e9500dac Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:03:07 +0100 Subject: iov_iter, net: Fold in csum_and_memcpy() Fold csum_and_memcpy() in to its callers. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230925120309.1731676-11-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: netdev@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/skbuff.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d0656cc11c16..c81ef5d76953 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3679,13 +3679,6 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l return __skb_put_padto(skb, len, true); } -static inline __wsum csum_and_memcpy(void *to, const void *from, size_t len, - __wsum sum, size_t off) -{ - __wsum next = csum_partial_copy_nocheck(from, to, len); - return csum_block_add(sum, next, off); -} - struct csum_state { __wsum csum; size_t off; -- cgit v1.2.3 From 7c6f353e8a73cbb8919a20e0912021a9f9d24170 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:03:08 +0100 Subject: iov_iter, net: Merge csum_and_copy_from_iter{,_full}() together Move csum_and_copy_from_iter_full() out of line and then merge csum_and_copy_from_iter() into its only caller. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230925120309.1731676-12-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: netdev@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/skbuff.h | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c81ef5d76953..be402f55f6d6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3679,23 +3679,8 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l return __skb_put_padto(skb, len, true); } -struct csum_state { - __wsum csum; - size_t off; -}; - -size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); - -static __always_inline __must_check -bool csum_and_copy_from_iter_full(void *addr, size_t bytes, - __wsum *csum, struct iov_iter *i) -{ - size_t copied = csum_and_copy_from_iter(addr, bytes, csum, i); - if (likely(copied == bytes)) - return true; - iov_iter_revert(i, copied); - return false; -} +bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) + __must_check; static inline int skb_add_data(struct sk_buff *skb, struct iov_iter *from, int copy) -- cgit v1.2.3 From b5f0e20f444cd150121e0ce912ebd3f2dabd12bc Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:03:09 +0100 Subject: iov_iter, net: Move hash_and_copy_to_iter() to net/ Move hash_and_copy_to_iter() to be with its only caller in networking code. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230925120309.1731676-13-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: netdev@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/uio.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 0a5426c97e02..b6214cbf2a43 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -338,9 +338,6 @@ iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes) return npages; } -size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, - struct iov_iter *i); - struct iovec *iovec_from_user(const struct iovec __user *uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_iov, bool compat); -- cgit v1.2.3 From 2b76129c5ae710423cfb55806803341af6a403a7 Mon Sep 17 00:00:00 2001 From: David Meriin Date: Mon, 24 Jul 2023 23:30:44 +0300 Subject: accel/habanalabs: move cpucp interface to linux/habanalabs The CPUCP interface is moved to a shared folder outside of accel as a pre-requisite to upstream the NIC drivers that will also include this file. Signed-off-by: David Meriin Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/linux/habanalabs/cpucp_if.h | 1407 +++++++++++++++++++++++++++++++++ include/linux/habanalabs/hl_boot_if.h | 790 ++++++++++++++++++ 2 files changed, 2197 insertions(+) create mode 100644 include/linux/habanalabs/cpucp_if.h create mode 100644 include/linux/habanalabs/hl_boot_if.h (limited to 'include/linux') diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h new file mode 100644 index 000000000000..4cdedb603ecb --- /dev/null +++ b/include/linux/habanalabs/cpucp_if.h @@ -0,0 +1,1407 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2020-2022 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef CPUCP_IF_H +#define CPUCP_IF_H + +#include +#include + +#include "hl_boot_if.h" + +#define NUM_HBM_PSEUDO_CH 2 +#define NUM_HBM_CH_PER_DEV 8 +#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_SHIFT 0 +#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK 0x00000001 +#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_SHIFT 1 +#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK 0x00000002 +#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_SHIFT 2 +#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK 0x00000004 +#define CPUCP_PKT_HBM_ECC_INFO_DERR_SHIFT 3 +#define CPUCP_PKT_HBM_ECC_INFO_DERR_MASK 0x00000008 +#define CPUCP_PKT_HBM_ECC_INFO_SERR_SHIFT 4 +#define CPUCP_PKT_HBM_ECC_INFO_SERR_MASK 0x00000010 +#define CPUCP_PKT_HBM_ECC_INFO_TYPE_SHIFT 5 +#define CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK 0x00000020 +#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_SHIFT 6 +#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK 0x000007C0 + +#define PLL_MAP_MAX_BITS 128 +#define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8) + +/* + * info of the pkt queue pointers in the first async occurrence + */ +struct cpucp_pkt_sync_err { + __le32 pi; + __le32 ci; +}; + +struct hl_eq_hbm_ecc_data { + /* SERR counter */ + __le32 sec_cnt; + /* DERR counter */ + __le32 dec_cnt; + /* Supplemental Information according to the mask bits */ + __le32 hbm_ecc_info; + /* Address in hbm where the ecc happened */ + __le32 first_addr; + /* SERR continuous address counter */ + __le32 sec_cont_cnt; + __le32 pad; +}; + +/* + * EVENT QUEUE + */ + +struct hl_eq_header { + __le32 reserved; + __le32 ctl; +}; + +struct hl_eq_ecc_data { + __le64 ecc_address; + __le64 ecc_syndrom; + __u8 memory_wrapper_idx; + __u8 is_critical; + __le16 block_id; + __u8 pad[4]; +}; + +enum hl_sm_sei_cause { + SM_SEI_SO_OVERFLOW, + SM_SEI_LBW_4B_UNALIGNED, + SM_SEI_AXI_RESPONSE_ERR +}; + +struct hl_eq_sm_sei_data { + __le32 sei_log; + /* enum hl_sm_sei_cause */ + __u8 sei_cause; + __u8 pad[3]; +}; + +enum hl_fw_alive_severity { + FW_ALIVE_SEVERITY_MINOR, + FW_ALIVE_SEVERITY_CRITICAL +}; + +struct hl_eq_fw_alive { + __le64 uptime_seconds; + __le32 process_id; + __le32 thread_id; + /* enum hl_fw_alive_severity */ + __u8 severity; + __u8 pad[7]; +}; + +struct hl_eq_intr_cause { + __le64 intr_cause_data; +}; + +struct hl_eq_pcie_drain_ind_data { + struct hl_eq_intr_cause intr_cause; + __le64 drain_wr_addr_lbw; + __le64 drain_rd_addr_lbw; + __le64 drain_wr_addr_hbw; + __le64 drain_rd_addr_hbw; +}; + +struct hl_eq_razwi_lbw_info_regs { + __le32 rr_aw_razwi_reg; + __le32 rr_aw_razwi_id_reg; + __le32 rr_ar_razwi_reg; + __le32 rr_ar_razwi_id_reg; +}; + +struct hl_eq_razwi_hbw_info_regs { + __le32 rr_aw_razwi_hi_reg; + __le32 rr_aw_razwi_lo_reg; + __le32 rr_aw_razwi_id_reg; + __le32 rr_ar_razwi_hi_reg; + __le32 rr_ar_razwi_lo_reg; + __le32 rr_ar_razwi_id_reg; +}; + +/* razwi_happened masks */ +#define RAZWI_HAPPENED_HBW 0x1 +#define RAZWI_HAPPENED_LBW 0x2 +#define RAZWI_HAPPENED_AW 0x4 +#define RAZWI_HAPPENED_AR 0x8 + +struct hl_eq_razwi_info { + __le32 razwi_happened_mask; + union { + struct hl_eq_razwi_lbw_info_regs lbw; + struct hl_eq_razwi_hbw_info_regs hbw; + }; + __le32 pad; +}; + +struct hl_eq_razwi_with_intr_cause { + struct hl_eq_razwi_info razwi_info; + struct hl_eq_intr_cause intr_cause; +}; + +#define HBM_CA_ERR_CMD_LIFO_LEN 8 +#define HBM_RD_ERR_DATA_LIFO_LEN 8 +#define HBM_WR_PAR_CMD_LIFO_LEN 11 + +enum hl_hbm_sei_cause { + /* Command/address parity error event is split into 2 events due to + * size limitation: ODD suffix for odd HBM CK_t cycles and EVEN suffix + * for even HBM CK_t cycles + */ + HBM_SEI_CMD_PARITY_EVEN, + HBM_SEI_CMD_PARITY_ODD, + /* Read errors can be reflected as a combination of SERR/DERR/parity + * errors. Therefore, we define one event for all read error types. + * LKD will perform further proccessing. + */ + HBM_SEI_READ_ERR, + HBM_SEI_WRITE_DATA_PARITY_ERR, + HBM_SEI_CATTRIP, + HBM_SEI_MEM_BIST_FAIL, + HBM_SEI_DFI, + HBM_SEI_INV_TEMP_READ_OUT, + HBM_SEI_BIST_FAIL, +}; + +/* Masks for parsing hl_hbm_sei_headr fields */ +#define HBM_ECC_SERR_CNTR_MASK 0xFF +#define HBM_ECC_DERR_CNTR_MASK 0xFF00 +#define HBM_RD_PARITY_CNTR_MASK 0xFF0000 + +/* HBM index and MC index are known by the event_id */ +struct hl_hbm_sei_header { + union { + /* relevant only in case of HBM read error */ + struct { + __u8 ecc_serr_cnt; + __u8 ecc_derr_cnt; + __u8 read_par_cnt; + __u8 reserved; + }; + /* All other cases */ + __le32 cnt; + }; + __u8 sei_cause; /* enum hl_hbm_sei_cause */ + __u8 mc_channel; /* range: 0-3 */ + __u8 mc_pseudo_channel; /* range: 0-7 */ + __u8 is_critical; +}; + +#define HBM_RD_ADDR_SID_SHIFT 0 +#define HBM_RD_ADDR_SID_MASK 0x1 +#define HBM_RD_ADDR_BG_SHIFT 1 +#define HBM_RD_ADDR_BG_MASK 0x6 +#define HBM_RD_ADDR_BA_SHIFT 3 +#define HBM_RD_ADDR_BA_MASK 0x18 +#define HBM_RD_ADDR_COL_SHIFT 5 +#define HBM_RD_ADDR_COL_MASK 0x7E0 +#define HBM_RD_ADDR_ROW_SHIFT 11 +#define HBM_RD_ADDR_ROW_MASK 0x3FFF800 + +struct hbm_rd_addr { + union { + /* bit fields are only for FW use */ + struct { + u32 dbg_rd_err_addr_sid:1; + u32 dbg_rd_err_addr_bg:2; + u32 dbg_rd_err_addr_ba:2; + u32 dbg_rd_err_addr_col:6; + u32 dbg_rd_err_addr_row:15; + u32 reserved:6; + }; + __le32 rd_addr_val; + }; +}; + +#define HBM_RD_ERR_BEAT_SHIFT 2 +/* dbg_rd_err_misc fields: */ +/* Read parity is calculated per DW on every beat */ +#define HBM_RD_ERR_PAR_ERR_BEAT0_SHIFT 0 +#define HBM_RD_ERR_PAR_ERR_BEAT0_MASK 0x3 +#define HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT 8 +#define HBM_RD_ERR_PAR_DATA_BEAT0_MASK 0x300 +/* ECC is calculated per PC on every beat */ +#define HBM_RD_ERR_SERR_BEAT0_SHIFT 16 +#define HBM_RD_ERR_SERR_BEAT0_MASK 0x10000 +#define HBM_RD_ERR_DERR_BEAT0_SHIFT 24 +#define HBM_RD_ERR_DERR_BEAT0_MASK 0x100000 + +struct hl_eq_hbm_sei_read_err_intr_info { + /* DFI_RD_ERR_REP_ADDR */ + struct hbm_rd_addr dbg_rd_err_addr; + /* DFI_RD_ERR_REP_ERR */ + union { + struct { + /* bit fields are only for FW use */ + u32 dbg_rd_err_par:8; + u32 dbg_rd_err_par_data:8; + u32 dbg_rd_err_serr:4; + u32 dbg_rd_err_derr:4; + u32 reserved:8; + }; + __le32 dbg_rd_err_misc; + }; + /* DFI_RD_ERR_REP_DM */ + __le32 dbg_rd_err_dm; + /* DFI_RD_ERR_REP_SYNDROME */ + __le32 dbg_rd_err_syndrome; + /* DFI_RD_ERR_REP_DATA */ + __le32 dbg_rd_err_data[HBM_RD_ERR_DATA_LIFO_LEN]; +}; + +struct hl_eq_hbm_sei_ca_par_intr_info { + /* 14 LSBs */ + __le16 dbg_row[HBM_CA_ERR_CMD_LIFO_LEN]; + /* 18 LSBs */ + __le32 dbg_col[HBM_CA_ERR_CMD_LIFO_LEN]; +}; + +#define WR_PAR_LAST_CMD_COL_SHIFT 0 +#define WR_PAR_LAST_CMD_COL_MASK 0x3F +#define WR_PAR_LAST_CMD_BG_SHIFT 6 +#define WR_PAR_LAST_CMD_BG_MASK 0xC0 +#define WR_PAR_LAST_CMD_BA_SHIFT 8 +#define WR_PAR_LAST_CMD_BA_MASK 0x300 +#define WR_PAR_LAST_CMD_SID_SHIFT 10 +#define WR_PAR_LAST_CMD_SID_MASK 0x400 + +/* Row address isn't latched */ +struct hbm_sei_wr_cmd_address { + /* DFI_DERR_LAST_CMD */ + union { + struct { + /* bit fields are only for FW use */ + u32 col:6; + u32 bg:2; + u32 ba:2; + u32 sid:1; + u32 reserved:21; + }; + __le32 dbg_wr_cmd_addr; + }; +}; + +struct hl_eq_hbm_sei_wr_par_intr_info { + /* entry 0: WR command address from the 1st cycle prior to the error + * entry 1: WR command address from the 2nd cycle prior to the error + * and so on... + */ + struct hbm_sei_wr_cmd_address dbg_last_wr_cmds[HBM_WR_PAR_CMD_LIFO_LEN]; + /* derr[0:1] - 1st HBM cycle DERR output + * derr[2:3] - 2nd HBM cycle DERR output + */ + __u8 dbg_derr; + /* extend to reach 8B */ + __u8 pad[3]; +}; + +/* + * this struct represents the following sei causes: + * command parity, ECC double error, ECC single error, dfi error, cattrip, + * temperature read-out, read parity error and write parity error. + * some only use the header while some have extra data. + */ +struct hl_eq_hbm_sei_data { + struct hl_hbm_sei_header hdr; + union { + struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_even_info; + struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_odd_info; + struct hl_eq_hbm_sei_read_err_intr_info read_err_info; + struct hl_eq_hbm_sei_wr_par_intr_info wr_parity_info; + }; +}; + +/* Engine/farm arc interrupt type */ +enum hl_engine_arc_interrupt_type { + /* Qman/farm ARC DCCM QUEUE FULL interrupt type */ + ENGINE_ARC_DCCM_QUEUE_FULL_IRQ = 1 +}; + +/* Data structure specifies details of payload of DCCM QUEUE FULL interrupt */ +struct hl_engine_arc_dccm_queue_full_irq { + /* Queue index value which caused DCCM QUEUE FULL */ + __le32 queue_index; + __le32 pad; +}; + +/* Data structure specifies details of QM/FARM ARC interrupt */ +struct hl_eq_engine_arc_intr_data { + /* ARC engine id e.g. DCORE0_TPC0_QM_ARC, DCORE0_TCP1_QM_ARC */ + __le32 engine_id; + __le32 intr_type; /* enum hl_engine_arc_interrupt_type */ + /* More info related to the interrupt e.g. queue index + * incase of DCCM_QUEUE_FULL interrupt. + */ + __le64 payload; + __le64 pad[5]; +}; + +#define ADDR_DEC_ADDRESS_COUNT_MAX 4 + +/* Data structure specifies details of ADDR_DEC interrupt */ +struct hl_eq_addr_dec_intr_data { + struct hl_eq_intr_cause intr_cause; + __le64 addr[ADDR_DEC_ADDRESS_COUNT_MAX]; + __u8 addr_cnt; + __u8 pad[7]; +}; + +struct hl_eq_entry { + struct hl_eq_header hdr; + union { + __le64 data_placeholder; + struct hl_eq_ecc_data ecc_data; + struct hl_eq_hbm_ecc_data hbm_ecc_data; /* Obsolete */ + struct hl_eq_sm_sei_data sm_sei_data; + struct cpucp_pkt_sync_err pkt_sync_err; + struct hl_eq_fw_alive fw_alive; + struct hl_eq_intr_cause intr_cause; + struct hl_eq_pcie_drain_ind_data pcie_drain_ind_data; + struct hl_eq_razwi_info razwi_info; + struct hl_eq_razwi_with_intr_cause razwi_with_intr_cause; + struct hl_eq_hbm_sei_data sei_data; /* Gaudi2 HBM */ + struct hl_eq_engine_arc_intr_data arc_data; + struct hl_eq_addr_dec_intr_data addr_dec; + __le64 data[7]; + }; +}; + +#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry) + +#define EQ_CTL_READY_SHIFT 31 +#define EQ_CTL_READY_MASK 0x80000000 + +#define EQ_CTL_EVENT_TYPE_SHIFT 16 +#define EQ_CTL_EVENT_TYPE_MASK 0x0FFF0000 + +#define EQ_CTL_INDEX_SHIFT 0 +#define EQ_CTL_INDEX_MASK 0x0000FFFF + +enum pq_init_status { + PQ_INIT_STATUS_NA = 0, + PQ_INIT_STATUS_READY_FOR_CP, + PQ_INIT_STATUS_READY_FOR_HOST, + PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI, + PQ_INIT_STATUS_LEN_NOT_POWER_OF_TWO_ERR, + PQ_INIT_STATUS_ILLEGAL_Q_ADDR_ERR +}; + +/* + * CpuCP Primary Queue Packets + * + * During normal operation, the host's kernel driver needs to send various + * messages to CpuCP, usually either to SET some value into a H/W periphery or + * to GET the current value of some H/W periphery. For example, SET the + * frequency of MME/TPC and GET the value of the thermal sensor. + * + * These messages can be initiated either by the User application or by the + * host's driver itself, e.g. power management code. In either case, the + * communication from the host's driver to CpuCP will *always* be in + * synchronous mode, meaning that the host will send a single message and poll + * until the message was acknowledged and the results are ready (if results are + * needed). + * + * This means that only a single message can be sent at a time and the host's + * driver must wait for its result before sending the next message. Having said + * that, because these are control messages which are sent in a relatively low + * frequency, this limitation seems acceptable. It's important to note that + * in case of multiple devices, messages to different devices *can* be sent + * at the same time. + * + * The message, inputs/outputs (if relevant) and fence object will be located + * on the device DDR at an address that will be determined by the host's driver. + * During device initialization phase, the host will pass to CpuCP that address. + * Most of the message types will contain inputs/outputs inside the message + * itself. The common part of each message will contain the opcode of the + * message (its type) and a field representing a fence object. + * + * When the host's driver wishes to send a message to CPU CP, it will write the + * message contents to the device DDR, clear the fence object and then write to + * the PSOC_ARC1_AUX_SW_INTR, to issue interrupt 121 to ARC Management CPU. + * + * Upon receiving the interrupt (#121), CpuCP will read the message from the + * DDR. In case the message is a SET operation, CpuCP will first perform the + * operation and then write to the fence object on the device DDR. In case the + * message is a GET operation, CpuCP will first fill the results section on the + * device DDR and then write to the fence object. If an error occurred, CpuCP + * will fill the rc field with the right error code. + * + * In the meantime, the host's driver will poll on the fence object. Once the + * host sees that the fence object is signaled, it will read the results from + * the device DDR (if relevant) and resume the code execution in the host's + * driver. + * + * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8 + * so the value being put by the host's driver matches the value read by CpuCP + * + * Non-QMAN packets should be limited to values 1 through (2^8 - 1) + * + * Detailed description: + * + * CPUCP_PACKET_DISABLE_PCI_ACCESS - + * After receiving this packet the embedded CPU must NOT issue PCI + * transactions (read/write) towards the Host CPU. This also include + * sending MSI-X interrupts. + * This packet is usually sent before the device is moved to D3Hot state. + * + * CPUCP_PACKET_ENABLE_PCI_ACCESS - + * After receiving this packet the embedded CPU is allowed to issue PCI + * transactions towards the Host CPU, including sending MSI-X interrupts. + * This packet is usually send after the device is moved to D0 state. + * + * CPUCP_PACKET_TEMPERATURE_GET - + * Fetch the current temperature / Max / Max Hyst / Critical / + * Critical Hyst of a specified thermal sensor. The packet's + * arguments specify the desired sensor and the field to get. + * + * CPUCP_PACKET_VOLTAGE_GET - + * Fetch the voltage / Max / Min of a specified sensor. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_CURRENT_GET - + * Fetch the current / Max / Min of a specified sensor. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_FAN_SPEED_GET - + * Fetch the speed / Max / Min of a specified fan. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_PWM_GET - + * Fetch the pwm value / mode of a specified pwm. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_PWM_SET - + * Set the pwm value / mode of a specified pwm. The packet's + * arguments specify the sensor, type and value. + * + * CPUCP_PACKET_FREQUENCY_SET - + * Set the frequency of a specified PLL. The packet's arguments specify + * the PLL and the desired frequency. The actual frequency in the device + * might differ from the requested frequency. + * + * CPUCP_PACKET_FREQUENCY_GET - + * Fetch the frequency of a specified PLL. The packet's arguments specify + * the PLL. + * + * CPUCP_PACKET_LED_SET - + * Set the state of a specified led. The packet's arguments + * specify the led and the desired state. + * + * CPUCP_PACKET_I2C_WR - + * Write 32-bit value to I2C device. The packet's arguments specify the + * I2C bus, address and value. + * + * CPUCP_PACKET_I2C_RD - + * Read 32-bit value from I2C device. The packet's arguments specify the + * I2C bus and address. + * + * CPUCP_PACKET_INFO_GET - + * Fetch information from the device as specified in the packet's + * structure. The host's driver passes the max size it allows the CpuCP to + * write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. + * + * CPUCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed + * + * CPUCP_PACKET_UNMASK_RAZWI_IRQ - + * Unmask the given IRQ. The IRQ number is specified in the value field. + * The packet is sent after receiving an interrupt and printing its + * relevant information. + * + * CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY - + * Unmask the given IRQs. The IRQs numbers are specified in an array right + * after the cpucp_packet structure, where its first element is the array + * length. The packet is sent after a soft reset was done in order to + * handle any interrupts that were sent during the reset process. + * + * CPUCP_PACKET_TEST - + * Test packet for CpuCP connectivity. The CPU will put the fence value + * in the result field. + * + * CPUCP_PACKET_FREQUENCY_CURR_GET - + * Fetch the current frequency of a specified PLL. The packet's arguments + * specify the PLL. + * + * CPUCP_PACKET_MAX_POWER_GET - + * Fetch the maximal power of the device. + * + * CPUCP_PACKET_MAX_POWER_SET - + * Set the maximal power of the device. The packet's arguments specify + * the power. + * + * CPUCP_PACKET_EEPROM_DATA_GET - + * Get EEPROM data from the CpuCP kernel. The buffer is specified in the + * addr field. The CPU will put the returned data size in the result + * field. In addition, the host's driver passes the max size it allows the + * CpuCP to write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. + * + * CPUCP_PACKET_NIC_INFO_GET - + * Fetch information from the device regarding the NIC. the host's driver + * passes the max size it allows the CpuCP to write to the structure, to + * prevent data corruption in case of mismatched driver/FW versions. + * + * CPUCP_PACKET_TEMPERATURE_SET - + * Set the value of the offset property of a specified thermal sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_VOLTAGE_SET - + * Trigger the reset_history property of a specified voltage sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_CURRENT_SET - + * Trigger the reset_history property of a specified current sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_PCIE_THROUGHPUT_GET - + * Get throughput of PCIe. + * The packet's arguments specify the transaction direction (TX/RX). + * The window measurement is 10[msec], and the return value is in KB/sec. + * + * CPUCP_PACKET_PCIE_REPLAY_CNT_GET + * Replay count measures number of "replay" events, which is basicly + * number of retries done by PCIe. + * + * CPUCP_PACKET_TOTAL_ENERGY_GET - + * Total Energy is measurement of energy from the time FW Linux + * is loaded. It is calculated by multiplying the average power + * by time (passed from armcp start). The units are in MilliJouls. + * + * CPUCP_PACKET_PLL_INFO_GET - + * Fetch frequencies of PLL from the required PLL IP. + * The packet's arguments specify the device PLL type + * Pll type is the PLL from device pll_index enum. + * The result is composed of 4 outputs, each is 16-bit + * frequency in MHz. + * + * CPUCP_PACKET_POWER_GET - + * Fetch the present power consumption of the device (Current * Voltage). + * + * CPUCP_PACKET_NIC_PFC_SET - + * Enable/Disable the NIC PFC feature. The packet's arguments specify the + * NIC port, relevant lanes to configure and one bit indication for + * enable/disable. + * + * CPUCP_PACKET_NIC_FAULT_GET - + * Fetch the current indication for local/remote faults from the NIC MAC. + * The result is 32-bit value of the relevant register. + * + * CPUCP_PACKET_NIC_LPBK_SET - + * Enable/Disable the MAC loopback feature. The packet's arguments specify + * the NIC port, relevant lanes to configure and one bit indication for + * enable/disable. + * + * CPUCP_PACKET_NIC_MAC_INIT - + * Configure the NIC MAC channels. The packet's arguments specify the + * NIC port and the speed. + * + * CPUCP_PACKET_MSI_INFO_SET - + * set the index number for each supported msi type going from + * host to device + * + * CPUCP_PACKET_NIC_XPCS91_REGS_GET - + * Fetch the un/correctable counters values from the NIC MAC. + * + * CPUCP_PACKET_NIC_STAT_REGS_GET - + * Fetch various NIC MAC counters from the NIC STAT. + * + * CPUCP_PACKET_NIC_STAT_REGS_CLR - + * Clear the various NIC MAC counters in the NIC STAT. + * + * CPUCP_PACKET_NIC_STAT_REGS_ALL_GET - + * Fetch all NIC MAC counters from the NIC STAT. + * + * CPUCP_PACKET_IS_IDLE_CHECK - + * Check if the device is IDLE in regard to the DMA/compute engines + * and QMANs. The f/w will return a bitmask where each bit represents + * a different engine or QMAN according to enum cpucp_idle_mask. + * The bit will be 1 if the engine is NOT idle. + * + * CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET - + * Fetch all HBM replaced-rows and prending to be replaced rows data. + * + * CPUCP_PACKET_HBM_PENDING_ROWS_STATUS - + * Fetch status of HBM rows pending replacement and need a reboot to + * be replaced. + * + * CPUCP_PACKET_POWER_SET - + * Resets power history of device to 0 + * + * CPUCP_PACKET_ENGINE_CORE_ASID_SET - + * Packet to perform engine core ASID configuration + * + * CPUCP_PACKET_SEC_ATTEST_GET - + * Get the attestaion data that is collected during various stages of the + * boot sequence. the attestation data is also hashed with some unique + * number (nonce) provided by the host to prevent replay attacks. + * public key and certificate also provided as part of the FW response. + * + * CPUCP_PACKET_MONITOR_DUMP_GET - + * Get monitors registers dump from the CpuCP kernel. + * The CPU will put the registers dump in the a buffer allocated by the driver + * which address is passed via the CpuCp packet. In addition, the host's driver + * passes the max size it allows the CpuCP to write to the structure, to prevent + * data corruption in case of mismatched driver/FW versions. + * Obsolete. + * + * CPUCP_PACKET_GENERIC_PASSTHROUGH - + * Generic opcode for all firmware info that is only passed to host + * through the LKD, without getting parsed there. + * + * CPUCP_PACKET_ACTIVE_STATUS_SET - + * LKD sends FW indication whether device is free or in use, this indication is reported + * also to the BMC. + * + * CPUCP_PACKET_REGISTER_INTERRUPTS - + * Packet to register interrupts indicating LKD is ready to receive events from FW. + * + * CPUCP_PACKET_SOFT_RESET - + * Packet to perform soft-reset. + * + * CPUCP_PACKET_INTS_REGISTER - + * Packet to inform FW that queues have been established and LKD is ready to receive + * EQ events. + */ + +enum cpucp_packet_id { + CPUCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */ + CPUCP_PACKET_ENABLE_PCI_ACCESS, /* internal */ + CPUCP_PACKET_TEMPERATURE_GET, /* sysfs */ + CPUCP_PACKET_VOLTAGE_GET, /* sysfs */ + CPUCP_PACKET_CURRENT_GET, /* sysfs */ + CPUCP_PACKET_FAN_SPEED_GET, /* sysfs */ + CPUCP_PACKET_PWM_GET, /* sysfs */ + CPUCP_PACKET_PWM_SET, /* sysfs */ + CPUCP_PACKET_FREQUENCY_SET, /* sysfs */ + CPUCP_PACKET_FREQUENCY_GET, /* sysfs */ + CPUCP_PACKET_LED_SET, /* debugfs */ + CPUCP_PACKET_I2C_WR, /* debugfs */ + CPUCP_PACKET_I2C_RD, /* debugfs */ + CPUCP_PACKET_INFO_GET, /* IOCTL */ + CPUCP_PACKET_FLASH_PROGRAM_REMOVED, + CPUCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */ + CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */ + CPUCP_PACKET_TEST, /* internal */ + CPUCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */ + CPUCP_PACKET_MAX_POWER_GET, /* sysfs */ + CPUCP_PACKET_MAX_POWER_SET, /* sysfs */ + CPUCP_PACKET_EEPROM_DATA_GET, /* sysfs */ + CPUCP_PACKET_NIC_INFO_GET, /* internal */ + CPUCP_PACKET_TEMPERATURE_SET, /* sysfs */ + CPUCP_PACKET_VOLTAGE_SET, /* sysfs */ + CPUCP_PACKET_CURRENT_SET, /* sysfs */ + CPUCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */ + CPUCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */ + CPUCP_PACKET_TOTAL_ENERGY_GET, /* internal */ + CPUCP_PACKET_PLL_INFO_GET, /* internal */ + CPUCP_PACKET_NIC_STATUS, /* internal */ + CPUCP_PACKET_POWER_GET, /* internal */ + CPUCP_PACKET_NIC_PFC_SET, /* internal */ + CPUCP_PACKET_NIC_FAULT_GET, /* internal */ + CPUCP_PACKET_NIC_LPBK_SET, /* internal */ + CPUCP_PACKET_NIC_MAC_CFG, /* internal */ + CPUCP_PACKET_MSI_INFO_SET, /* internal */ + CPUCP_PACKET_NIC_XPCS91_REGS_GET, /* internal */ + CPUCP_PACKET_NIC_STAT_REGS_GET, /* internal */ + CPUCP_PACKET_NIC_STAT_REGS_CLR, /* internal */ + CPUCP_PACKET_NIC_STAT_REGS_ALL_GET, /* internal */ + CPUCP_PACKET_IS_IDLE_CHECK, /* internal */ + CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET,/* internal */ + CPUCP_PACKET_HBM_PENDING_ROWS_STATUS, /* internal */ + CPUCP_PACKET_POWER_SET, /* internal */ + CPUCP_PACKET_RESERVED, /* not used */ + CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */ + CPUCP_PACKET_RESERVED2, /* not used */ + CPUCP_PACKET_SEC_ATTEST_GET, /* internal */ + CPUCP_PACKET_RESERVED3, /* not used */ + CPUCP_PACKET_RESERVED4, /* not used */ + CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */ + CPUCP_PACKET_RESERVED5, /* not used */ + CPUCP_PACKET_RESERVED6, /* not used */ + CPUCP_PACKET_RESERVED7, /* not used */ + CPUCP_PACKET_GENERIC_PASSTHROUGH, /* IOCTL */ + CPUCP_PACKET_RESERVED8, /* not used */ + CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */ + CPUCP_PACKET_RESERVED9, /* not used */ + CPUCP_PACKET_RESERVED10, /* not used */ + CPUCP_PACKET_RESERVED11, /* not used */ + CPUCP_PACKET_RESERVED12, /* internal */ + CPUCP_PACKET_RESERVED13, /* internal */ + CPUCP_PACKET_SOFT_RESET, /* internal */ + CPUCP_PACKET_INTS_REGISTER, /* internal */ + CPUCP_PACKET_ID_MAX /* must be last */ +}; + +#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 + +#define CPUCP_PKT_CTL_RC_SHIFT 12 +#define CPUCP_PKT_CTL_RC_MASK 0x0000F000 + +#define CPUCP_PKT_CTL_OPCODE_SHIFT 16 +#define CPUCP_PKT_CTL_OPCODE_MASK 0x1FFF0000 + +#define CPUCP_PKT_RES_PLL_OUT0_SHIFT 0 +#define CPUCP_PKT_RES_PLL_OUT0_MASK 0x000000000000FFFFull +#define CPUCP_PKT_RES_PLL_OUT1_SHIFT 16 +#define CPUCP_PKT_RES_PLL_OUT1_MASK 0x00000000FFFF0000ull +#define CPUCP_PKT_RES_PLL_OUT2_SHIFT 32 +#define CPUCP_PKT_RES_PLL_OUT2_MASK 0x0000FFFF00000000ull +#define CPUCP_PKT_RES_PLL_OUT3_SHIFT 48 +#define CPUCP_PKT_RES_PLL_OUT3_MASK 0xFFFF000000000000ull + +#define CPUCP_PKT_RES_EEPROM_OUT0_SHIFT 0 +#define CPUCP_PKT_RES_EEPROM_OUT0_MASK 0x000000000000FFFFull +#define CPUCP_PKT_RES_EEPROM_OUT1_SHIFT 16 +#define CPUCP_PKT_RES_EEPROM_OUT1_MASK 0x0000000000FF0000ull + +#define CPUCP_PKT_VAL_PFC_IN1_SHIFT 0 +#define CPUCP_PKT_VAL_PFC_IN1_MASK 0x0000000000000001ull +#define CPUCP_PKT_VAL_PFC_IN2_SHIFT 1 +#define CPUCP_PKT_VAL_PFC_IN2_MASK 0x000000000000001Eull + +#define CPUCP_PKT_VAL_LPBK_IN1_SHIFT 0 +#define CPUCP_PKT_VAL_LPBK_IN1_MASK 0x0000000000000001ull +#define CPUCP_PKT_VAL_LPBK_IN2_SHIFT 1 +#define CPUCP_PKT_VAL_LPBK_IN2_MASK 0x000000000000001Eull + +#define CPUCP_PKT_VAL_MAC_CNT_IN1_SHIFT 0 +#define CPUCP_PKT_VAL_MAC_CNT_IN1_MASK 0x0000000000000001ull +#define CPUCP_PKT_VAL_MAC_CNT_IN2_SHIFT 1 +#define CPUCP_PKT_VAL_MAC_CNT_IN2_MASK 0x00000000FFFFFFFEull + +/* heartbeat status bits */ +#define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT 0 +#define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK 0x00000001 + +struct cpucp_packet { + union { + __le64 value; /* For SET packets */ + __le64 result; /* For GET packets */ + __le64 addr; /* For PQ */ + }; + + __le32 ctl; + + __le32 fence; /* Signal to host that message is completed */ + + union { + struct {/* For temperature/current/voltage/fan/pwm get/set */ + __le16 sensor_index; + __le16 type; + }; + + struct { /* For I2C read/write */ + __u8 i2c_bus; + __u8 i2c_addr; + __u8 i2c_reg; + /* + * In legacy implemetations, i2c_len was not present, + * was unused and just added as pad. + * So if i2c_len is 0, it is treated as legacy + * and r/w 1 Byte, else if i2c_len is specified, + * its treated as new multibyte r/w support. + */ + __u8 i2c_len; + }; + + struct {/* For PLL info fetch */ + __le16 pll_type; + /* TODO pll_reg is kept temporary before removal */ + __le16 pll_reg; + }; + + /* For any general request */ + __le32 index; + + /* For frequency get/set */ + __le32 pll_index; + + /* For led set */ + __le32 led_index; + + /* For get CpuCP info/EEPROM data/NIC info */ + __le32 data_max_size; + + /* + * For any general status bitmask. Shall be used whenever the + * result cannot be used to hold general purpose data. + */ + __le32 status_mask; + + /* random, used once number, for security packets */ + __le32 nonce; + }; + + union { + /* For NIC requests */ + __le32 port_index; + + /* For Generic packet sub index */ + __le32 pkt_subidx; + }; +}; + +struct cpucp_unmask_irq_arr_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 irqs[]; +}; + +struct cpucp_nic_status_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 data[]; +}; + +struct cpucp_array_data_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 data[]; +}; + +enum cpucp_led_index { + CPUCP_LED0_INDEX = 0, + CPUCP_LED1_INDEX, + CPUCP_LED2_INDEX, + CPUCP_LED_MAX_INDEX = CPUCP_LED2_INDEX +}; + +/* + * enum cpucp_packet_rc - Error return code + * @cpucp_packet_success -> in case of success. + * @cpucp_packet_invalid -> this is to support first generation platforms. + * @cpucp_packet_fault -> in case of processing error like failing to + * get device binding or semaphore etc. + * @cpucp_packet_invalid_pkt -> when cpucp packet is un-supported. + * @cpucp_packet_invalid_params -> when checking parameter like length of buffer + * or attribute value etc. + * @cpucp_packet_rc_max -> It indicates size of enum so should be at last. + */ +enum cpucp_packet_rc { + cpucp_packet_success, + cpucp_packet_invalid, + cpucp_packet_fault, + cpucp_packet_invalid_pkt, + cpucp_packet_invalid_params, + cpucp_packet_rc_max +}; + +/* + * cpucp_temp_type should adhere to hwmon_temp_attributes + * defined in Linux kernel hwmon.h file + */ +enum cpucp_temp_type { + cpucp_temp_input, + cpucp_temp_min = 4, + cpucp_temp_min_hyst, + cpucp_temp_max = 6, + cpucp_temp_max_hyst, + cpucp_temp_crit, + cpucp_temp_crit_hyst, + cpucp_temp_offset = 19, + cpucp_temp_lowest = 21, + cpucp_temp_highest = 22, + cpucp_temp_reset_history = 23, + cpucp_temp_warn = 24, + cpucp_temp_max_crit = 25, + cpucp_temp_max_warn = 26, +}; + +enum cpucp_in_attributes { + cpucp_in_input, + cpucp_in_min, + cpucp_in_max, + cpucp_in_lowest = 6, + cpucp_in_highest = 7, + cpucp_in_reset_history, + cpucp_in_intr_alarm_a, + cpucp_in_intr_alarm_b, +}; + +enum cpucp_curr_attributes { + cpucp_curr_input, + cpucp_curr_min, + cpucp_curr_max, + cpucp_curr_lowest = 6, + cpucp_curr_highest = 7, + cpucp_curr_reset_history +}; + +enum cpucp_fan_attributes { + cpucp_fan_input, + cpucp_fan_min = 2, + cpucp_fan_max +}; + +enum cpucp_pwm_attributes { + cpucp_pwm_input, + cpucp_pwm_enable +}; + +enum cpucp_pcie_throughput_attributes { + cpucp_pcie_throughput_tx, + cpucp_pcie_throughput_rx +}; + +/* TODO temporary kept before removal */ +enum cpucp_pll_reg_attributes { + cpucp_pll_nr_reg, + cpucp_pll_nf_reg, + cpucp_pll_od_reg, + cpucp_pll_div_factor_reg, + cpucp_pll_div_sel_reg +}; + +/* TODO temporary kept before removal */ +enum cpucp_pll_type_attributes { + cpucp_pll_cpu, + cpucp_pll_pci, +}; + +/* + * cpucp_power_type aligns with hwmon_power_attributes + * defined in Linux kernel hwmon.h file + */ +enum cpucp_power_type { + CPUCP_POWER_INPUT = 8, + CPUCP_POWER_INPUT_HIGHEST = 9, + CPUCP_POWER_RESET_INPUT_HISTORY = 11 +}; + +/* + * MSI type enumeration table for all ASICs and future SW versions. + * For future ASIC-LKD compatibility, we can only add new enumerations. + * at the end of the table (before CPUCP_NUM_OF_MSI_TYPES). + * Changing the order of entries or removing entries is not allowed. + */ +enum cpucp_msi_type { + CPUCP_EVENT_QUEUE_MSI_TYPE, + CPUCP_NIC_PORT1_MSI_TYPE, + CPUCP_NIC_PORT3_MSI_TYPE, + CPUCP_NIC_PORT5_MSI_TYPE, + CPUCP_NIC_PORT7_MSI_TYPE, + CPUCP_NIC_PORT9_MSI_TYPE, + CPUCP_NUM_OF_MSI_TYPES +}; + +/* + * PLL enumeration table used for all ASICs and future SW versions. + * For future ASIC-LKD compatibility, we can only add new enumerations. + * at the end of the table. + * Changing the order of entries or removing entries is not allowed. + */ +enum pll_index { + CPU_PLL = 0, + PCI_PLL = 1, + NIC_PLL = 2, + DMA_PLL = 3, + MESH_PLL = 4, + MME_PLL = 5, + TPC_PLL = 6, + IF_PLL = 7, + SRAM_PLL = 8, + NS_PLL = 9, + HBM_PLL = 10, + MSS_PLL = 11, + DDR_PLL = 12, + VID_PLL = 13, + BANK_PLL = 14, + MMU_PLL = 15, + IC_PLL = 16, + MC_PLL = 17, + EMMC_PLL = 18, + D2D_PLL = 19, + CS_PLL = 20, + C2C_PLL = 21, + NCH_PLL = 22, + C2M_PLL = 23, + PLL_MAX +}; + +enum rl_index { + TPC_RL = 0, + MME_RL, + EDMA_RL, +}; + +enum pvt_index { + PVT_SW, + PVT_SE, + PVT_NW, + PVT_NE +}; + +/* Event Queue Packets */ + +struct eq_generic_event { + __le64 data[7]; +}; + +/* + * CpuCP info + */ + +#define CARD_NAME_MAX_LEN 16 +#define CPUCP_MAX_SENSORS 128 +#define CPUCP_MAX_NICS 128 +#define CPUCP_LANES_PER_NIC 4 +#define CPUCP_NIC_QSFP_EEPROM_MAX_LEN 1024 +#define CPUCP_MAX_NIC_LANES (CPUCP_MAX_NICS * CPUCP_LANES_PER_NIC) +#define CPUCP_NIC_MASK_ARR_LEN ((CPUCP_MAX_NICS + 63) / 64) +#define CPUCP_NIC_POLARITY_ARR_LEN ((CPUCP_MAX_NIC_LANES + 63) / 64) +#define CPUCP_HBM_ROW_REPLACE_MAX 32 + +struct cpucp_sensor { + __le32 type; + __le32 flags; +}; + +/** + * struct cpucp_card_types - ASIC card type. + * @cpucp_card_type_pci: PCI card. + * @cpucp_card_type_pmc: PCI Mezzanine Card. + */ +enum cpucp_card_types { + cpucp_card_type_pci, + cpucp_card_type_pmc +}; + +#define CPUCP_SEC_CONF_ENABLED_SHIFT 0 +#define CPUCP_SEC_CONF_ENABLED_MASK 0x00000001 + +#define CPUCP_SEC_CONF_FLASH_WP_SHIFT 1 +#define CPUCP_SEC_CONF_FLASH_WP_MASK 0x00000002 + +#define CPUCP_SEC_CONF_EEPROM_WP_SHIFT 2 +#define CPUCP_SEC_CONF_EEPROM_WP_MASK 0x00000004 + +/** + * struct cpucp_security_info - Security information. + * @config: configuration bit field + * @keys_num: number of stored keys + * @revoked_keys: revoked keys bit field + * @min_svn: minimal security version + */ +struct cpucp_security_info { + __u8 config; + __u8 keys_num; + __u8 revoked_keys; + __u8 min_svn; +}; + +/** + * struct cpucp_info - Info from CpuCP that is necessary to the host's driver + * @sensors: available sensors description. + * @kernel_version: CpuCP linux kernel version. + * @reserved: reserved field. + * @card_type: card configuration type. + * @card_location: in a server, each card has different connections topology + * depending on its location (relevant for PMC card type) + * @cpld_version: CPLD programmed F/W version. + * @infineon_version: Infineon main DC-DC version. + * @fuse_version: silicon production FUSE information. + * @thermal_version: thermald S/W version. + * @cpucp_version: CpuCP S/W version. + * @infineon_second_stage_version: Infineon 2nd stage DC-DC version. + * @dram_size: available DRAM size. + * @card_name: card name that will be displayed in HWMON subsystem on the host + * @tpc_binning_mask: TPC binning mask, 1 bit per TPC instance + * (0 = functional, 1 = binned) + * @decoder_binning_mask: Decoder binning mask, 1 bit per decoder instance + * (0 = functional, 1 = binned), maximum 1 per dcore + * @sram_binning: Categorize SRAM functionality + * (0 = fully functional, 1 = lower-half is not functional, + * 2 = upper-half is not functional) + * @sec_info: security information + * @pll_map: Bit map of supported PLLs for current ASIC version. + * @mme_binning_mask: MME binning mask, + * bits [0:6] <==> dcore0 mme fma + * bits [7:13] <==> dcore1 mme fma + * bits [14:20] <==> dcore0 mme ima + * bits [21:27] <==> dcore1 mme ima + * For each group, if the 6th bit is set then first 5 bits + * represent the col's idx [0-31], otherwise these bits are + * ignored, and col idx 32 is binned. 7th bit is don't care. + * @dram_binning_mask: DRAM binning mask, 1 bit per dram instance + * (0 = functional 1 = binned) + * @memory_repair_flag: eFuse flag indicating memory repair + * @edma_binning_mask: EDMA binning mask, 1 bit per EDMA instance + * (0 = functional 1 = binned) + * @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance + * (0 = functional 1 = binned) + * @interposer_version: Interposer version programmed in eFuse + * @substrate_version: Substrate version programmed in eFuse + * @fw_hbm_region_size: Size in bytes of FW reserved region in HBM. + * @fw_os_version: Firmware OS Version + */ +struct cpucp_info { + struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; + __u8 kernel_version[VERSION_MAX_LEN]; + __le32 reserved; + __le32 card_type; + __le32 card_location; + __le32 cpld_version; + __le32 infineon_version; + __u8 fuse_version[VERSION_MAX_LEN]; + __u8 thermal_version[VERSION_MAX_LEN]; + __u8 cpucp_version[VERSION_MAX_LEN]; + __le32 infineon_second_stage_version; + __le64 dram_size; + char card_name[CARD_NAME_MAX_LEN]; + __le64 tpc_binning_mask; + __le64 decoder_binning_mask; + __u8 sram_binning; + __u8 dram_binning_mask; + __u8 memory_repair_flag; + __u8 edma_binning_mask; + __u8 xbar_binning_mask; + __u8 interposer_version; + __u8 substrate_version; + __u8 reserved2; + struct cpucp_security_info sec_info; + __le32 fw_hbm_region_size; + __u8 pll_map[PLL_MAP_LEN]; + __le64 mme_binning_mask; + __u8 fw_os_version[VERSION_MAX_LEN]; +}; + +struct cpucp_mac_addr { + __u8 mac_addr[ETH_ALEN]; +}; + +enum cpucp_serdes_type { + TYPE_1_SERDES_TYPE, + TYPE_2_SERDES_TYPE, + HLS1_SERDES_TYPE, + HLS1H_SERDES_TYPE, + HLS2_SERDES_TYPE, + HLS2_TYPE_1_SERDES_TYPE, + MAX_NUM_SERDES_TYPE, /* number of types */ + UNKNOWN_SERDES_TYPE = 0xFFFF /* serdes_type is u16 */ +}; + +struct cpucp_nic_info { + struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS]; + __le64 link_mask[CPUCP_NIC_MASK_ARR_LEN]; + __le64 pol_tx_mask[CPUCP_NIC_POLARITY_ARR_LEN]; + __le64 pol_rx_mask[CPUCP_NIC_POLARITY_ARR_LEN]; + __le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN]; + __u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN]; + __le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN]; + __le16 serdes_type; /* enum cpucp_serdes_type */ + __le16 tx_swap_map[CPUCP_MAX_NICS]; + __u8 reserved[6]; +}; + +#define PAGE_DISCARD_MAX 64 + +struct page_discard_info { + __u8 num_entries; + __u8 reserved[7]; + __le32 mmu_page_idx[PAGE_DISCARD_MAX]; +}; + +/* + * struct frac_val - fracture value represented by "integer.frac". + * @integer: the integer part of the fracture value; + * @frac: the fracture part of the fracture value. + */ +struct frac_val { + union { + struct { + __le16 integer; + __le16 frac; + }; + __le32 val; + }; +}; + +/* + * struct ser_val - the SER (symbol error rate) value is represented by "integer * 10 ^ -exp". + * @integer: the integer part of the SER value; + * @exp: the exponent part of the SER value. + */ +struct ser_val { + __le16 integer; + __le16 exp; +}; + +/* + * struct cpucp_nic_status - describes the status of a NIC port. + * @port: NIC port index. + * @bad_format_cnt: e.g. CRC. + * @responder_out_of_sequence_psn_cnt: e.g NAK. + * @high_ber_reinit_cnt: link reinit due to high BER. + * @correctable_err_cnt: e.g. bit-flip. + * @uncorrectable_err_cnt: e.g. MAC errors. + * @retraining_cnt: re-training counter. + * @up: is port up. + * @pcs_link: has PCS link. + * @phy_ready: is PHY ready. + * @auto_neg: is Autoneg enabled. + * @timeout_retransmission_cnt: timeout retransmission events. + * @high_ber_cnt: high ber events. + * @pre_fec_ser: pre FEC SER value. + * @post_fec_ser: post FEC SER value. + * @throughput: measured throughput. + * @latency: measured latency. + */ +struct cpucp_nic_status { + __le32 port; + __le32 bad_format_cnt; + __le32 responder_out_of_sequence_psn_cnt; + __le32 high_ber_reinit; + __le32 correctable_err_cnt; + __le32 uncorrectable_err_cnt; + __le32 retraining_cnt; + __u8 up; + __u8 pcs_link; + __u8 phy_ready; + __u8 auto_neg; + __le32 timeout_retransmission_cnt; + __le32 high_ber_cnt; + struct ser_val pre_fec_ser; + struct ser_val post_fec_ser; + struct frac_val bandwidth; + struct frac_val lat; +}; + +enum cpucp_hbm_row_replace_cause { + REPLACE_CAUSE_DOUBLE_ECC_ERR, + REPLACE_CAUSE_MULTI_SINGLE_ECC_ERR, +}; + +struct cpucp_hbm_row_info { + __u8 hbm_idx; + __u8 pc; + __u8 sid; + __u8 bank_idx; + __le16 row_addr; + __u8 replaced_row_cause; /* enum cpucp_hbm_row_replace_cause */ + __u8 pad; +}; + +struct cpucp_hbm_row_replaced_rows_info { + __le16 num_replaced_rows; + __u8 pad[6]; + struct cpucp_hbm_row_info replaced_rows[CPUCP_HBM_ROW_REPLACE_MAX]; +}; + +enum cpu_reset_status { + CPU_RST_STATUS_NA = 0, + CPU_RST_STATUS_SOFT_RST_DONE = 1, +}; + +#define SEC_PCR_DATA_BUF_SZ 256 +#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ +#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ + +/* + * struct cpucp_sec_attest_info - attestation report of the boot + * @pcr_data: raw values of the PCR registers + * @pcr_num_reg: number of PCR registers in the pcr_data array + * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) + * @nonce: number only used once. random number provided by host. this also + * passed to the quote command as a qualifying data. + * @pcr_quote_len: length of the attestation quote data (bytes) + * @pcr_quote: attestation report data structure + * @quote_sig_len: length of the attestation report signature (bytes) + * @quote_sig: signature structure of the attestation report + * @pub_data_len: length of the public data (bytes) + * @public_data: public key for the signed attestation + * (outPublic + name + qualifiedName) + * @certificate_len: length of the certificate (bytes) + * @certificate: certificate for the attestation signing key + */ +struct cpucp_sec_attest_info { + __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; + __u8 pcr_num_reg; + __u8 pcr_reg_len; + __le16 pad0; + __le32 nonce; + __le16 pcr_quote_len; + __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; + __u8 quote_sig_len; + __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; + __le16 pub_data_len; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __le16 certificate_len; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; +}; + +/* + * struct cpucp_dev_info_signed - device information signed by a secured device + * @info: device information structure as defined above + * @nonce: number only used once. random number provided by host. this number is + * hashed and signed along with the device information. + * @info_sig_len: length of the attestation signature (bytes) + * @info_sig: signature of the info + nonce data. + * @pub_data_len: length of the public data (bytes) + * @public_data: public key info signed info data + * (outPublic + name + qualifiedName) + * @certificate_len: length of the certificate (bytes) + * @certificate: certificate for the signing key + */ +struct cpucp_dev_info_signed { + struct cpucp_info info; /* assumed to be 64bit aligned */ + __le32 nonce; + __le32 pad0; + __u8 info_sig_len; + __u8 info_sig[SEC_SIGNATURE_BUF_SZ]; + __le16 pub_data_len; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __le16 certificate_len; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; +}; + +#define DCORE_MON_REGS_SZ 512 +/* + * struct dcore_monitor_regs_data - DCORE monitor regs data. + * the structure follows sync manager block layout. Obsolete. + * @mon_pay_addrl: array of payload address low bits. + * @mon_pay_addrh: array of payload address high bits. + * @mon_pay_data: array of payload data. + * @mon_arm: array of monitor arm. + * @mon_status: array of monitor status. + */ +struct dcore_monitor_regs_data { + __le32 mon_pay_addrl[DCORE_MON_REGS_SZ]; + __le32 mon_pay_addrh[DCORE_MON_REGS_SZ]; + __le32 mon_pay_data[DCORE_MON_REGS_SZ]; + __le32 mon_arm[DCORE_MON_REGS_SZ]; + __le32 mon_status[DCORE_MON_REGS_SZ]; +}; + +/* contains SM data for each SYNC_MNGR (Obsolete) */ +struct cpucp_monitor_dump { + struct dcore_monitor_regs_data sync_mngr_w_s; + struct dcore_monitor_regs_data sync_mngr_e_s; + struct dcore_monitor_regs_data sync_mngr_w_n; + struct dcore_monitor_regs_data sync_mngr_e_n; +}; + +/* + * The Type of the generic request (and other input arguments) will be fetched from user by reading + * from "pkt_subidx" field in struct cpucp_packet. + * + * HL_PASSTHROUGHT_VERSIONS - Fetch all firmware versions. + */ +enum hl_passthrough_type { + HL_PASSTHROUGH_VERSIONS, +}; + +#endif /* CPUCP_IF_H */ diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h new file mode 100644 index 000000000000..7de8a5786a36 --- /dev/null +++ b/include/linux/habanalabs/hl_boot_if.h @@ -0,0 +1,790 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2018-2020 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef HL_BOOT_IF_H +#define HL_BOOT_IF_H + +#define LKD_HARD_RESET_MAGIC 0xED7BD694 /* deprecated - do not use */ +#define HL_POWER9_HOST_MAGIC 0x1DA30009 + +#define BOOT_FIT_SRAM_OFFSET 0x200000 + +#define VERSION_MAX_LEN 128 + +enum cpu_boot_err { + CPU_BOOT_ERR_DRAM_INIT_FAIL = 0, + CPU_BOOT_ERR_FIT_CORRUPTED = 1, + CPU_BOOT_ERR_TS_INIT_FAIL = 2, + CPU_BOOT_ERR_DRAM_SKIPPED = 3, + CPU_BOOT_ERR_BMC_WAIT_SKIPPED = 4, + CPU_BOOT_ERR_NIC_DATA_NOT_RDY = 5, + CPU_BOOT_ERR_NIC_FW_FAIL = 6, + CPU_BOOT_ERR_SECURITY_NOT_RDY = 7, + CPU_BOOT_ERR_SECURITY_FAIL = 8, + CPU_BOOT_ERR_EFUSE_FAIL = 9, + CPU_BOOT_ERR_PRI_IMG_VER_FAIL = 10, + CPU_BOOT_ERR_SEC_IMG_VER_FAIL = 11, + CPU_BOOT_ERR_PLL_FAIL = 12, + CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL = 13, + CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18, + CPU_BOOT_ERR_BINNING_FAIL = 19, + CPU_BOOT_ERR_TPM_FAIL = 20, + CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21, + CPU_BOOT_ERR_EEPROM_FAIL = 22, + CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL = 23, + CPU_BOOT_ERR_ENABLED = 31, + CPU_BOOT_ERR_SCND_EN = 63, + CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */ +}; + +/* + * Mask for fatal failures + * This mask contains all possible fatal failures, and a dynamic code + * will clear the non-relevant ones. + */ +#define CPU_BOOT_ERR_FATAL_MASK \ + ((1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) | \ + (1 << CPU_BOOT_ERR_PLL_FAIL) | \ + (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) | \ + (1 << CPU_BOOT_ERR_BINNING_FAIL) | \ + (1 << CPU_BOOT_ERR_DRAM_SKIPPED) | \ + (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) | \ + (1 << CPU_BOOT_ERR_EEPROM_FAIL)) + +/* + * CPU error bits in BOOT_ERROR registers + * + * CPU_BOOT_ERR0_DRAM_INIT_FAIL DRAM initialization failed. + * DRAM is not reliable to use. + * + * CPU_BOOT_ERR0_FIT_CORRUPTED FIT data integrity verification of the + * image provided by the host has failed. + * + * CPU_BOOT_ERR0_TS_INIT_FAIL Thermal Sensor initialization failed. + * Boot continues as usual, but keep in + * mind this is a warning. + * + * CPU_BOOT_ERR0_DRAM_SKIPPED DRAM initialization has been skipped. + * Skipping DRAM initialization has been + * requested (e.g. strap, command, etc.) + * and FW skipped the DRAM initialization. + * Host can initialize the DRAM. + * + * CPU_BOOT_ERR0_BMC_WAIT_SKIPPED Waiting for BMC data will be skipped. + * Meaning the BMC data might not be + * available until reset. + * + * CPU_BOOT_ERR0_NIC_DATA_NOT_RDY NIC data from BMC is not ready. + * BMC has not provided the NIC data yet. + * Once provided this bit will be cleared. + * + * CPU_BOOT_ERR0_NIC_FW_FAIL NIC FW loading failed. + * The NIC FW loading and initialization + * failed. This means NICs are not usable. + * + * CPU_BOOT_ERR0_SECURITY_NOT_RDY Chip security initialization has been + * started, but is not ready yet - chip + * cannot be accessed. + * + * CPU_BOOT_ERR0_SECURITY_FAIL Security related tasks have failed. + * The tasks are security init (root of + * trust), boot authentication (chain of + * trust), data packets authentication. + * + * CPU_BOOT_ERR0_EFUSE_FAIL Reading from eFuse failed. + * The PCI device ID might be wrong. + * + * CPU_BOOT_ERR0_PRI_IMG_VER_FAIL Verification of primary image failed. + * It mean that ppboot checksum + * verification for the preboot primary + * image has failed to match expected + * checksum. Trying to program image again + * might solve this. + * + * CPU_BOOT_ERR0_SEC_IMG_VER_FAIL Verification of secondary image failed. + * It mean that ppboot checksum + * verification for the preboot secondary + * image has failed to match expected + * checksum. Trying to program image again + * might solve this. + * + * CPU_BOOT_ERR0_PLL_FAIL PLL settings failed, meaning that one + * of the PLLs remains in REF_CLK + * + * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support + * should be contacted. + * + * CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR Critical error was detected during + * the execution of ppboot or preboot. + * for example: stack overflow. + * + * CPU_BOOT_ERR0_BINNING_FAIL Binning settings failed, meaning + * malfunctioning components might still be + * in use. + * + * CPU_BOOT_ERR0_TPM_FAIL TPM verification flow failed. + * + * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature + * sensor. + * + * CPU_BOOT_ERR_EEPROM_FAIL Failed reading EEPROM data. Defaults + * are used. + * + * CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL Failed scrubbing the Engines/ARCFarm + * memories. Boot disabled until reset. + * + * CPU_BOOT_ERR0_ENABLED Error registers enabled. + * This is a main indication that the + * running FW populates the error + * registers. Meaning the error bits are + * not garbage, but actual error statuses. + */ +#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) +#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << CPU_BOOT_ERR_FIT_CORRUPTED) +#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << CPU_BOOT_ERR_TS_INIT_FAIL) +#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << CPU_BOOT_ERR_DRAM_SKIPPED) +#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << CPU_BOOT_ERR_BMC_WAIT_SKIPPED) +#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << CPU_BOOT_ERR_NIC_DATA_NOT_RDY) +#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << CPU_BOOT_ERR_NIC_FW_FAIL) +#define CPU_BOOT_ERR0_SECURITY_NOT_RDY (1 << CPU_BOOT_ERR_SECURITY_NOT_RDY) +#define CPU_BOOT_ERR0_SECURITY_FAIL (1 << CPU_BOOT_ERR_SECURITY_FAIL) +#define CPU_BOOT_ERR0_EFUSE_FAIL (1 << CPU_BOOT_ERR_EFUSE_FAIL) +#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << CPU_BOOT_ERR_PRI_IMG_VER_FAIL) +#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << CPU_BOOT_ERR_SEC_IMG_VER_FAIL) +#define CPU_BOOT_ERR0_PLL_FAIL (1 << CPU_BOOT_ERR_PLL_FAIL) +#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) +#define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR) +#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL) +#define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL) +#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL) +#define CPU_BOOT_ERR0_EEPROM_FAIL (1 << CPU_BOOT_ERR_EEPROM_FAIL) +#define CPU_BOOT_ERR0_ENG_ARC_MEM_SCRUB_FAIL (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) +#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED) +#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED) + +enum cpu_boot_dev_sts { + CPU_BOOT_DEV_STS_SECURITY_EN = 0, + CPU_BOOT_DEV_STS_DEBUG_EN = 1, + CPU_BOOT_DEV_STS_WATCHDOG_EN = 2, + CPU_BOOT_DEV_STS_DRAM_INIT_EN = 3, + CPU_BOOT_DEV_STS_BMC_WAIT_EN = 4, + CPU_BOOT_DEV_STS_E2E_CRED_EN = 5, + CPU_BOOT_DEV_STS_HBM_CRED_EN = 6, + CPU_BOOT_DEV_STS_RL_EN = 7, + CPU_BOOT_DEV_STS_SRAM_SCR_EN = 8, + CPU_BOOT_DEV_STS_DRAM_SCR_EN = 9, + CPU_BOOT_DEV_STS_FW_HARD_RST_EN = 10, + CPU_BOOT_DEV_STS_PLL_INFO_EN = 11, + CPU_BOOT_DEV_STS_SP_SRAM_EN = 12, + CPU_BOOT_DEV_STS_CLK_GATE_EN = 13, + CPU_BOOT_DEV_STS_HBM_ECC_EN = 14, + CPU_BOOT_DEV_STS_PKT_PI_ACK_EN = 15, + CPU_BOOT_DEV_STS_FW_LD_COM_EN = 16, + CPU_BOOT_DEV_STS_FW_IATU_CONF_EN = 17, + CPU_BOOT_DEV_STS_FW_NIC_MAC_EN = 18, + CPU_BOOT_DEV_STS_DYN_PLL_EN = 19, + CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN = 20, + CPU_BOOT_DEV_STS_EQ_INDEX_EN = 21, + CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN = 22, + CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN = 23, + CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN = 24, + CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN = 25, + CPU_BOOT_DEV_STS_MAP_HWMON_EN = 26, + CPU_BOOT_DEV_STS_ENABLED = 31, + CPU_BOOT_DEV_STS_SCND_EN = 63, + CPU_BOOT_DEV_STS_LAST = 64 /* we have 2 registers of 32 bits */ +}; + +/* + * BOOT DEVICE STATUS bits in BOOT_DEVICE_STS registers + * + * CPU_BOOT_DEV_STS0_SECURITY_EN Security is Enabled. + * This is an indication for security + * enabled in FW, which means that + * all conditions for security are met: + * device is indicated as security enabled, + * registers are protected, and device + * uses keys for image verification. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_DEBUG_EN Debug is enabled. + * Enabled when JTAG or DEBUG is enabled + * in FW. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_WATCHDOG_EN Watchdog is enabled. + * Watchdog is enabled in FW. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_DRAM_INIT_EN DRAM initialization is enabled. + * DRAM initialization has been done in FW. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_BMC_WAIT_EN Waiting for BMC data enabled. + * If set, it means that during boot, + * FW waited for BMC data. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_E2E_CRED_EN E2E credits initialized. + * FW initialized E2E credits. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_HBM_CRED_EN HBM credits initialized. + * FW initialized HBM credits. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_RL_EN Rate limiter initialized. + * FW initialized rate limiter. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_SRAM_SCR_EN SRAM scrambler enabled. + * FW initialized SRAM scrambler. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_DRAM_SCR_EN DRAM scrambler enabled. + * FW initialized DRAM scrambler. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_FW_HARD_RST_EN FW hard reset procedure is enabled. + * FW has the hard reset procedure + * implemented. This means that FW will + * perform hard reset procedure on + * receiving the halt-machine event. + * Initialized in: preboot, u-boot, linux + * + * CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_SP_SRAM_EN SP SRAM is initialized and available + * for use. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_CLK_GATE_EN Clock Gating enabled. + * FW initialized Clock Gating. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_HBM_ECC_EN HBM ECC handling Enabled. + * FW handles HBM ECC indications. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN Packets ack value used in the armcpd + * is set to the PI counter. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_FW_LD_COM_EN Flexible FW loading communication + * protocol is enabled. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN FW iATU configuration is enabled. + * This bit if set, means the iATU has been + * configured and is ready for use. + * Initialized in: ppboot + * + * CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN NIC MAC channels init is done by FW and + * any access to them is done via the FW. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled. + * FW sends to host a bitmap of supported + * PLLs. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN GIC access permission only from + * previleged entity. FW sets this status + * bit for host. If this bit is set then + * GIC can not be accessed from host. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_EQ_INDEX_EN Event Queue (EQ) index is a running + * index for each new event sent to host. + * This is used as a method in host to + * identify that the waiting event in + * queue is actually a new event which + * was not served before. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN Use multiple scratchpad interfaces to + * prevent IRQs overriding each other. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN + * NIC STAT and XPCS91 access is restricted + * and is done via FW only. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN + * NIC STAT get all is supported. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN + * F/W checks if the device is idle by reading defined set + * of registers. It returns a bitmask of all the engines, + * where a bit is set if the engine is not idle. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_MAP_HWMON_EN + * If set, means f/w supports proprietary + * HWMON enum mapping to cpucp enums. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. + * This is a main indication that the + * running FW populates the device status + * register. Meaning the device status + * bits are not garbage, but actual + * statuses. + * Initialized in: preboot + * + */ +#define CPU_BOOT_DEV_STS0_SECURITY_EN (1 << CPU_BOOT_DEV_STS_SECURITY_EN) +#define CPU_BOOT_DEV_STS0_DEBUG_EN (1 << CPU_BOOT_DEV_STS_DEBUG_EN) +#define CPU_BOOT_DEV_STS0_WATCHDOG_EN (1 << CPU_BOOT_DEV_STS_WATCHDOG_EN) +#define CPU_BOOT_DEV_STS0_DRAM_INIT_EN (1 << CPU_BOOT_DEV_STS_DRAM_INIT_EN) +#define CPU_BOOT_DEV_STS0_BMC_WAIT_EN (1 << CPU_BOOT_DEV_STS_BMC_WAIT_EN) +#define CPU_BOOT_DEV_STS0_E2E_CRED_EN (1 << CPU_BOOT_DEV_STS_E2E_CRED_EN) +#define CPU_BOOT_DEV_STS0_HBM_CRED_EN (1 << CPU_BOOT_DEV_STS_HBM_CRED_EN) +#define CPU_BOOT_DEV_STS0_RL_EN (1 << CPU_BOOT_DEV_STS_RL_EN) +#define CPU_BOOT_DEV_STS0_SRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_SRAM_SCR_EN) +#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_DRAM_SCR_EN) +#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << CPU_BOOT_DEV_STS_FW_HARD_RST_EN) +#define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << CPU_BOOT_DEV_STS_PLL_INFO_EN) +#define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << CPU_BOOT_DEV_STS_SP_SRAM_EN) +#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << CPU_BOOT_DEV_STS_CLK_GATE_EN) +#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << CPU_BOOT_DEV_STS_HBM_ECC_EN) +#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << CPU_BOOT_DEV_STS_PKT_PI_ACK_EN) +#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << CPU_BOOT_DEV_STS_FW_LD_COM_EN) +#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << CPU_BOOT_DEV_STS_FW_IATU_CONF_EN) +#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_MAC_EN) +#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << CPU_BOOT_DEV_STS_DYN_PLL_EN) +#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN) +#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << CPU_BOOT_DEV_STS_EQ_INDEX_EN) +#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN) +#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN) +#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN) +#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN) +#define CPU_BOOT_DEV_STS0_MAP_HWMON_EN (1 << CPU_BOOT_DEV_STS_MAP_HWMON_EN) +#define CPU_BOOT_DEV_STS0_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) +#define CPU_BOOT_DEV_STS1_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) + +enum cpu_boot_status { + CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */ + CPU_BOOT_STATUS_IN_WFE = 1, + CPU_BOOT_STATUS_DRAM_RDY = 2, + CPU_BOOT_STATUS_SRAM_AVAIL = 3, + CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */ + CPU_BOOT_STATUS_IN_PREBOOT = 5, + CPU_BOOT_STATUS_IN_SPL, /* deprecated - not reported */ + CPU_BOOT_STATUS_IN_UBOOT = 7, + CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */ + CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */ + /* U-Boot console prompt activated, commands are not processed */ + CPU_BOOT_STATUS_UBOOT_NOT_READY = 10, + /* Finished NICs init, reported after DRAM and NICs */ + CPU_BOOT_STATUS_NIC_FW_RDY = 11, + CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */ + CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */ + CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */ + /* Last boot loader progress status, ready to receive commands */ + CPU_BOOT_STATUS_READY_TO_BOOT = 15, + /* Internal Boot finished, ready for boot-fit */ + CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16, + /* Internal Security has been initialized, device can be accessed */ + CPU_BOOT_STATUS_SECURITY_READY = 17, +}; + +enum kmd_msg { + KMD_MSG_NA = 0, + KMD_MSG_GOTO_WFE, + KMD_MSG_FIT_RDY, + KMD_MSG_SKIP_BMC, + RESERVED, + KMD_MSG_RST_DEV, + KMD_MSG_LAST +}; + +enum cpu_msg_status { + CPU_MSG_CLR = 0, + CPU_MSG_OK, + CPU_MSG_ERR, +}; + +/* communication registers mapping - consider ABI when changing */ +struct cpu_dyn_regs { + __le32 cpu_pq_base_addr_low; + __le32 cpu_pq_base_addr_high; + __le32 cpu_pq_length; + __le32 cpu_pq_init_status; + __le32 cpu_eq_base_addr_low; + __le32 cpu_eq_base_addr_high; + __le32 cpu_eq_length; + __le32 cpu_eq_ci; + __le32 cpu_cq_base_addr_low; + __le32 cpu_cq_base_addr_high; + __le32 cpu_cq_length; + __le32 cpu_pf_pq_pi; + __le32 cpu_boot_dev_sts0; + __le32 cpu_boot_dev_sts1; + __le32 cpu_boot_err0; + __le32 cpu_boot_err1; + __le32 cpu_boot_status; + __le32 fw_upd_sts; + __le32 fw_upd_cmd; + __le32 fw_upd_pending_sts; + __le32 fuse_ver_offset; + __le32 preboot_ver_offset; + __le32 uboot_ver_offset; + __le32 hw_state; + __le32 kmd_msg_to_cpu; + __le32 cpu_cmd_status_to_host; + __le32 gic_host_pi_upd_irq; + __le32 gic_tpc_qm_irq_ctrl; + __le32 gic_mme_qm_irq_ctrl; + __le32 gic_dma_qm_irq_ctrl; + __le32 gic_nic_qm_irq_ctrl; + __le32 gic_dma_core_irq_ctrl; + __le32 gic_host_halt_irq; + __le32 gic_host_ints_irq; + __le32 gic_host_soft_rst_irq; + __le32 gic_rot_qm_irq_ctrl; + __le32 cpu_rst_status; + __le32 eng_arc_irq_ctrl; + __le32 reserved1[20]; /* reserve for future use */ +}; + +/* TODO: remove the desc magic after the code is updated to use message */ +/* HCDM - Habana Communications Descriptor Magic */ +#define HL_COMMS_DESC_MAGIC 0x4843444D +#define HL_COMMS_DESC_VER 3 + +/* HCMv - Habana Communications Message + header version */ +#define HL_COMMS_MSG_MAGIC_VALUE 0x48434D00 +#define HL_COMMS_MSG_MAGIC_MASK 0xFFFFFF00 +#define HL_COMMS_MSG_MAGIC_VER_MASK 0xFF + +#define HL_COMMS_MSG_MAGIC_VER(ver) (HL_COMMS_MSG_MAGIC_VALUE | \ + ((ver) & HL_COMMS_MSG_MAGIC_VER_MASK)) +#define HL_COMMS_MSG_MAGIC_V0 HL_COMMS_DESC_MAGIC +#define HL_COMMS_MSG_MAGIC_V1 HL_COMMS_MSG_MAGIC_VER(1) +#define HL_COMMS_MSG_MAGIC_V2 HL_COMMS_MSG_MAGIC_VER(2) +#define HL_COMMS_MSG_MAGIC_V3 HL_COMMS_MSG_MAGIC_VER(3) + +#define HL_COMMS_MSG_MAGIC HL_COMMS_MSG_MAGIC_V3 + +#define HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC(magic) \ + (((magic) & HL_COMMS_MSG_MAGIC_MASK) == \ + HL_COMMS_MSG_MAGIC_VALUE) + +#define HL_COMMS_MSG_MAGIC_VALIDATE_VERSION(magic, ver) \ + (((magic) & HL_COMMS_MSG_MAGIC_VER_MASK) >= \ + ((ver) & HL_COMMS_MSG_MAGIC_VER_MASK)) + +#define HL_COMMS_MSG_MAGIC_VALIDATE(magic, ver) \ + (HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC((magic)) && \ + HL_COMMS_MSG_MAGIC_VALIDATE_VERSION((magic), (ver))) + +enum comms_msg_type { + HL_COMMS_DESC_TYPE = 0, + HL_COMMS_RESET_CAUSE_TYPE = 1, + HL_COMMS_FW_CFG_SKIP_TYPE = 2, + HL_COMMS_BINNING_CONF_TYPE = 3, +}; + +/* + * Binning information shared between LKD and FW + * @tpc_mask_l - TPC binning information lower 64 bit + * @dec_mask - Decoder binning information + * @dram_mask - DRAM binning information + * @edma_mask - EDMA binning information + * @mme_mask_l - MME binning information lower 32 + * @mme_mask_h - MME binning information upper 32 + * @rot_mask - Rotator binning information + * @xbar_mask - xBAR binning information + * @reserved - reserved field for future binning info w/o ABI change + * @tpc_mask_h - TPC binning information upper 64 bit + * @nic_mask - NIC binning information + */ +struct lkd_fw_binning_info { + __le64 tpc_mask_l; + __le32 dec_mask; + __le32 dram_mask; + __le32 edma_mask; + __le32 mme_mask_l; + __le32 mme_mask_h; + __le32 rot_mask; + __le32 xbar_mask; + __le32 reserved0; + __le64 tpc_mask_h; + __le64 nic_mask; + __le32 reserved1[8]; +}; + +/* TODO: remove this struct after the code is updated to use message */ +/* this is the comms descriptor header - meta data */ +struct comms_desc_header { + __le32 magic; /* magic for validation */ + __le32 crc32; /* CRC32 of the descriptor w/o header */ + __le16 size; /* size of the descriptor w/o header */ + __u8 version; /* descriptor version */ + __u8 reserved[5]; /* pad to 64 bit */ +}; + +/* this is the comms message header - meta data */ +struct comms_msg_header { + __le32 magic; /* magic for validation */ + __le32 crc32; /* CRC32 of the message w/o header */ + __le16 size; /* size of the message w/o header */ + __u8 version; /* message payload version */ + __u8 type; /* message type */ + __u8 reserved[4]; /* pad to 64 bit */ +}; + +enum lkd_fw_ascii_msg_lvls { + LKD_FW_ASCII_MSG_ERR = 0, + LKD_FW_ASCII_MSG_WRN = 1, + LKD_FW_ASCII_MSG_INF = 2, + LKD_FW_ASCII_MSG_DBG = 3, +}; + +#define LKD_FW_ASCII_MSG_MAX_LEN 128 +#define LKD_FW_ASCII_MSG_MAX 4 /* consider ABI when changing */ + +struct lkd_fw_ascii_msg { + __u8 valid; + __u8 msg_lvl; + __u8 reserved[6]; + char msg[LKD_FW_ASCII_MSG_MAX_LEN]; +}; + +/* this is the main FW descriptor - consider ABI when changing */ +struct lkd_fw_comms_desc { + struct comms_desc_header header; + struct cpu_dyn_regs cpu_dyn_regs; + char fuse_ver[VERSION_MAX_LEN]; + char cur_fw_ver[VERSION_MAX_LEN]; + /* can be used for 1 more version w/o ABI change */ + char reserved0[VERSION_MAX_LEN]; + __le64 img_addr; /* address for next FW component load */ + struct lkd_fw_binning_info binning_info; + struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX]; + __le32 rsvd_mem_size_mb; /* reserved memory size [MB] for FW/SVE */ + char reserved1[4]; +}; + +enum comms_reset_cause { + HL_RESET_CAUSE_UNKNOWN = 0, + HL_RESET_CAUSE_HEARTBEAT = 1, + HL_RESET_CAUSE_TDR = 2, +}; + +/* TODO: remove define after struct name is aligned on all projects */ +#define lkd_msg_comms lkd_fw_comms_msg + +/* this is the comms message descriptor */ +struct lkd_fw_comms_msg { + struct comms_msg_header header; + /* union for future expantions of new messages */ + union { + struct { + struct cpu_dyn_regs cpu_dyn_regs; + char fuse_ver[VERSION_MAX_LEN]; + char cur_fw_ver[VERSION_MAX_LEN]; + /* can be used for 1 more version w/o ABI change */ + char reserved0[VERSION_MAX_LEN]; + /* address for next FW component load */ + __le64 img_addr; + struct lkd_fw_binning_info binning_info; + struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX]; + /* reserved memory size [MB] for FW/SVE */ + __le32 rsvd_mem_size_mb; + char reserved1[4]; + }; + struct { + __u8 reset_cause; + }; + struct { + __u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */ + }; + struct lkd_fw_binning_info binning_conf; + }; +}; + +/* + * LKD commands: + * + * COMMS_NOOP Used to clear the command register and no actual + * command is send. + * + * COMMS_CLR_STS Clear status command - FW should clear the + * status register. Used for synchronization + * between the commands as part of the race free + * protocol. + * + * COMMS_RST_STATE Reset the current communication state which is + * kept by FW for proper responses. + * Should be used in the beginning of the + * communication cycle to clean any leftovers from + * previous communication attempts. + * + * COMMS_PREP_DESC Prepare descriptor for setting up the + * communication and other dynamic data: + * struct lkd_fw_comms_desc. + * This command has a parameter stating the next FW + * component size, so the FW can actually prepare a + * space for it and in the status response provide + * the descriptor offset. The Offset of the next FW + * data component is a part of the descriptor + * structure. + * + * COMMS_DATA_RDY The FW data has been uploaded and is ready for + * validation. + * + * COMMS_EXEC Execute the next FW component. + * + * COMMS_RST_DEV Reset the device. + * + * COMMS_GOTO_WFE Execute WFE command. Allowed only on non-secure + * devices. + * + * COMMS_SKIP_BMC Perform actions required for BMC-less servers. + * Do not wait for BMC response. + * + * COMMS_PREP_DESC_ELBI Same as COMMS_PREP_DESC only that the memory + * space is allocated in a ELBI access only + * address range. + * + */ +enum comms_cmd { + COMMS_NOOP = 0, + COMMS_CLR_STS = 1, + COMMS_RST_STATE = 2, + COMMS_PREP_DESC = 3, + COMMS_DATA_RDY = 4, + COMMS_EXEC = 5, + COMMS_RST_DEV = 6, + COMMS_GOTO_WFE = 7, + COMMS_SKIP_BMC = 8, + COMMS_PREP_DESC_ELBI = 10, + COMMS_INVLD_LAST +}; + +#define COMMS_COMMAND_SIZE_SHIFT 0 +#define COMMS_COMMAND_SIZE_MASK 0x1FFFFFF +#define COMMS_COMMAND_CMD_SHIFT 27 +#define COMMS_COMMAND_CMD_MASK 0xF8000000 + +/* + * LKD command to FW register structure + * @size - FW component size + * @cmd - command from enum comms_cmd + */ +struct comms_command { + union { /* bit fields are only for FW use */ + struct { + u32 size :25; /* 32MB max. */ + u32 reserved :2; + enum comms_cmd cmd :5; /* 32 commands */ + }; + __le32 val; + }; +}; + +/* + * FW status + * + * COMMS_STS_NOOP Used to clear the status register and no actual + * status is provided. + * + * COMMS_STS_ACK Command has been received and recognized. + * + * COMMS_STS_OK Command execution has finished successfully. + * + * COMMS_STS_ERR Command execution was unsuccessful and resulted + * in error. + * + * COMMS_STS_VALID_ERR FW validation has failed. + * + * COMMS_STS_TIMEOUT_ERR Command execution has timed out. + */ +enum comms_sts { + COMMS_STS_NOOP = 0, + COMMS_STS_ACK = 1, + COMMS_STS_OK = 2, + COMMS_STS_ERR = 3, + COMMS_STS_VALID_ERR = 4, + COMMS_STS_TIMEOUT_ERR = 5, + COMMS_STS_INVLD_LAST +}; + +/* RAM types for FW components loading - defines the base address */ +enum comms_ram_types { + COMMS_SRAM = 0, + COMMS_DRAM = 1, +}; + +#define COMMS_STATUS_OFFSET_SHIFT 0 +#define COMMS_STATUS_OFFSET_MASK 0x03FFFFFF +#define COMMS_STATUS_OFFSET_ALIGN_SHIFT 2 +#define COMMS_STATUS_RAM_TYPE_SHIFT 26 +#define COMMS_STATUS_RAM_TYPE_MASK 0x0C000000 +#define COMMS_STATUS_STATUS_SHIFT 28 +#define COMMS_STATUS_STATUS_MASK 0xF0000000 + +/* + * FW status to LKD register structure + * @offset - an offset from the base of the ram_type shifted right by + * 2 bits (always aligned to 32 bits). + * Allows a maximum addressable offset of 256MB from RAM base. + * Example: for real offset in RAM of 0x800000 (8MB), the value + * in offset field is (0x800000 >> 2) = 0x200000. + * @ram_type - the RAM type that should be used for offset from + * enum comms_ram_types + * @status - status from enum comms_sts + */ +struct comms_status { + union { /* bit fields are only for FW use */ + struct { + u32 offset :26; + enum comms_ram_types ram_type :2; + enum comms_sts status :4; /* 16 statuses */ + }; + __le32 val; + }; +}; + +#define NAME_MAX_LEN 32 /* bytes */ +struct hl_module_data { + __u8 name[NAME_MAX_LEN]; + __u8 version[VERSION_MAX_LEN]; +}; + +/** + * struct hl_component_versions - versions associated with hl component. + * @struct_size: size of all the struct (including dynamic size of modules). + * @modules_offset: offset of the modules field in this struct. + * @component: version of the component itself. + * @fw_os: Firmware OS Version. + * @comp_name: Name of the component. + * @modules_counter: number of set bits in modules_mask. + * @reserved: reserved for future use. + * @modules: versions of the component's modules. Elborated explanation in + * struct cpucp_versions. + */ +struct hl_component_versions { + __le16 struct_size; + __le16 modules_offset; + __u8 component[VERSION_MAX_LEN]; + __u8 fw_os[VERSION_MAX_LEN]; + __u8 comp_name[NAME_MAX_LEN]; + __u8 modules_counter; + __u8 reserved[3]; + struct hl_module_data modules[]; +}; + +/* Max size of fit size */ +#define HL_FW_VERSIONS_FIT_SIZE 4096 + +#endif /* HL_BOOT_IF_H */ -- cgit v1.2.3 From 7c4130e6ddd709be2033a6635c91d445cb2baea5 Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Tue, 8 Aug 2023 12:56:47 +0300 Subject: accel/habanalabs/gaudi2: handle eq health heartbeat check Add mechanism for fw eq health check. this will be done using two flows: using the heartbeat mechanism and raising a dedicated interrupt to indicate an eq failure like EQ full. This patch will add implementation for the eq heartbeat for gaudi2 asic. More info about the heartbeat mechanism: Expand the heartbeat mechanism to monitor a new event that will be sent from FW upon receiving heartbeat message. that way driver can know that the eq is working or not. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/linux/habanalabs/cpucp_if.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index 4cdedb603ecb..a18fa81aad1f 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -33,6 +33,17 @@ #define PLL_MAP_MAX_BITS 128 #define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8) +enum eq_event_id { + EQ_EVENT_NIC_STS_REQUEST = 0, + EQ_EVENT_PWR_MODE_0, + EQ_EVENT_PWR_MODE_1, + EQ_EVENT_PWR_MODE_2, + EQ_EVENT_PWR_MODE_3, + EQ_EVENT_PWR_BRK_ENTRY, + EQ_EVENT_PWR_BRK_EXIT, + EQ_EVENT_HEARTBEAT, +}; + /* * info of the pkt queue pointers in the first async occurrence */ @@ -1143,6 +1154,7 @@ struct cpucp_security_info { * (0 = functional 1 = binned) * @interposer_version: Interposer version programmed in eFuse * @substrate_version: Substrate version programmed in eFuse + * @eq_health_check_supported: eq health check feature supported in FW. * @fw_hbm_region_size: Size in bytes of FW reserved region in HBM. * @fw_os_version: Firmware OS Version */ @@ -1169,7 +1181,7 @@ struct cpucp_info { __u8 xbar_binning_mask; __u8 interposer_version; __u8 substrate_version; - __u8 reserved2; + __u8 eq_health_check_supported; struct cpucp_security_info sec_info; __le32 fw_hbm_region_size; __u8 pll_map[PLL_MAP_LEN]; -- cgit v1.2.3 From 764bfd138f359423b299b7bf3fcbabb56b981ef5 Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Wed, 23 Aug 2023 12:36:25 +0300 Subject: accel/habanalabs/gaudi2: add eq health check using irq This is the second patch for applying the eq health check mechanism which will add support for the interrupt flow for gaudi2 asic. More info about the interrupt mechanism: set a dedicated msix for the eq error interrupt, and add interrupt handler for it. when FW detects some issue with EQ like EQ_FULL, it'll raise that interrupt and driver should reset the device. Driver will inform the FW which msix index to use through the already existing handshake mechanism which will send msix info message to fw. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/linux/habanalabs/cpucp_if.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index a18fa81aad1f..84d74c4ee4d3 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -1004,6 +1004,7 @@ enum cpucp_msi_type { CPUCP_NIC_PORT5_MSI_TYPE, CPUCP_NIC_PORT7_MSI_TYPE, CPUCP_NIC_PORT9_MSI_TYPE, + CPUCP_EVENT_QUEUE_ERR_MSI_TYPE, CPUCP_NUM_OF_MSI_TYPES }; -- cgit v1.2.3 From 9dca13141332e69fd657873194e77a1960fc9ab2 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Thu, 7 Sep 2023 14:43:01 +0300 Subject: accel/habanalabs: add fw status SHUTDOWN_PREP update hl_boot_if.h from specs to include CPU_BOOT_STATUS_FW_SHUTDOWN_PREP Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/linux/habanalabs/hl_boot_if.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h index 7de8a5786a36..93366d5621fd 100644 --- a/include/linux/habanalabs/hl_boot_if.h +++ b/include/linux/habanalabs/hl_boot_if.h @@ -394,6 +394,8 @@ enum cpu_boot_status { CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16, /* Internal Security has been initialized, device can be accessed */ CPU_BOOT_STATUS_SECURITY_READY = 17, + /* FW component is preparing to shutdown and communication with host is not available */ + CPU_BOOT_STATUS_FW_SHUTDOWN_PREP = 18, }; enum kmd_msg { -- cgit v1.2.3 From 7f1cd6fdd5872160da05098a84a94dee5e709e54 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 21 Sep 2023 14:52:01 +0300 Subject: accel/habanalabs: minor cosmetics update to cpucp_if.h - Update copyright years - Align comments Signed-off-by: Oded Gabbay Reviewed-by: Ofir Bitton --- include/linux/habanalabs/cpucp_if.h | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index 84d74c4ee4d3..86ea7c63a0d2 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2020-2022 HabanaLabs, Ltd. + * Copyright 2020-2023 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -668,18 +668,15 @@ enum pq_init_status { * Obsolete. * * CPUCP_PACKET_GENERIC_PASSTHROUGH - - * Generic opcode for all firmware info that is only passed to host - * through the LKD, without getting parsed there. + * Generic opcode for all firmware info that is only passed to host + * through the LKD, without getting parsed there. * * CPUCP_PACKET_ACTIVE_STATUS_SET - * LKD sends FW indication whether device is free or in use, this indication is reported * also to the BMC. * - * CPUCP_PACKET_REGISTER_INTERRUPTS - - * Packet to register interrupts indicating LKD is ready to receive events from FW. - * * CPUCP_PACKET_SOFT_RESET - - * Packet to perform soft-reset. + * Packet to perform soft-reset. * * CPUCP_PACKET_INTS_REGISTER - * Packet to inform FW that queues have been established and LKD is ready to receive @@ -750,9 +747,9 @@ enum cpucp_packet_id { CPUCP_PACKET_RESERVED11, /* not used */ CPUCP_PACKET_RESERVED12, /* internal */ CPUCP_PACKET_RESERVED13, /* internal */ - CPUCP_PACKET_SOFT_RESET, /* internal */ - CPUCP_PACKET_INTS_REGISTER, /* internal */ - CPUCP_PACKET_ID_MAX /* must be last */ + CPUCP_PACKET_SOFT_RESET, /* internal */ + CPUCP_PACKET_INTS_REGISTER, /* internal */ + CPUCP_PACKET_ID_MAX /* must be last */ }; #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 -- cgit v1.2.3 From 74975b4f2836e3cc10eeb6c38a6da54311e1de5b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 26 Sep 2023 16:59:43 +0200 Subject: gpio: acpi: remove acpi_get_and_request_gpiod() With no more users, we can remove acpi_get_and_request_gpiod(). Signed-off-by: Bartosz Golaszewski Reviewed-by: From: Andy Shevchenko Reviewed-by: Mika Westerberg --- include/linux/gpio/consumer.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 6cc345440a5b..db2dfbae8edb 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -606,8 +606,6 @@ void acpi_dev_remove_driver_gpios(struct acpi_device *adev); int devm_acpi_dev_add_driver_gpios(struct device *dev, const struct acpi_gpio_mapping *gpios); -struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, char *label); - #else /* CONFIG_GPIOLIB && CONFIG_ACPI */ #include @@ -625,12 +623,6 @@ static inline int devm_acpi_dev_add_driver_gpios(struct device *dev, return -ENXIO; } -static inline struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, - char *label) -{ - return ERR_PTR(-ENOSYS); -} - #endif /* CONFIG_GPIOLIB && CONFIG_ACPI */ -- cgit v1.2.3 From a0fddaa0b5a587cc8d185f8802fe7e48493c43ed Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 15 Sep 2023 08:22:37 -0700 Subject: rtc: Add API function to return alarm time bound by hardware limit Add rtc_bound_alarmtime() to return the requested alarm timeout bound by the maxmum alarm timeout that is supported by a given RTC. Signed-off-by: Guenter Roeck Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230915152238.1144706-2-linux@roeck-us.net --- include/linux/rtc.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 4c0bcbeb1f00..5f8e438a0312 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -225,6 +225,23 @@ static inline bool is_leap_year(unsigned int year) return (!(year % 4) && (year % 100)) || !(year % 400); } +/** + * rtc_bound_alarmtime() - Return alarm time bound by rtc limit + * @rtc: Pointer to rtc device structure + * @requested: Requested alarm timeout + * + * Return: Alarm timeout bound by maximum alarm time supported by rtc. + */ +static inline ktime_t rtc_bound_alarmtime(struct rtc_device *rtc, + ktime_t requested) +{ + if (rtc->alarm_offset_max && + rtc->alarm_offset_max * MSEC_PER_SEC < ktime_to_ms(requested)) + return ms_to_ktime(rtc->alarm_offset_max * MSEC_PER_SEC); + + return requested; +} + #define devm_rtc_register_device(device) \ __devm_rtc_register_device(THIS_MODULE, device) -- cgit v1.2.3 From e346fb6d774abf1d9a87d39b1e3eef0b7397d154 Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Sat, 30 Sep 2023 02:00:05 -0300 Subject: xattr: make the xattr array itself const As it is currently declared, the xattr_handler structs are const but the array containing their pointers is not. This patch makes it so that fs modules can place them in .rodata, which makes it harder for accidental/malicious modifications at runtime. Signed-off-by: Wedson Almeida Filho Link: https://lore.kernel.org/r/20230930050033.41174-2-wedsonaf@gmail.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4aeb3fa11927..bba22e25664d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1206,7 +1206,7 @@ struct super_block { #ifdef CONFIG_SECURITY void *s_security; #endif - const struct xattr_handler **s_xattr; + const struct xattr_handler * const *s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */ -- cgit v1.2.3 From e01cc1e8c2ad73cebb980878ede5584e0f2688f7 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 25 Sep 2023 16:50:23 +0200 Subject: locking/atomic: Add generic support for sync_try_cmpxchg() and its fallback Provide the generic sync_try_cmpxchg() function from the raw_ prefixed version, also adding explicit instrumentation. The patch amends existing scripts to generate sync_try_cmpxchg() locking primitive and its raw_sync_try_cmpxchg() fallback, while leaving existing macros from the try_cmpxchg() family unchanged. The target can define its own arch_sync_try_cmpxchg() to override the generic version of raw_sync_try_cmpxchg(). This allows the target to generate more optimal assembly than the generic version. Additionally, the patch renames two scripts to better reflect whet they really do. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Mark Rutland Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org --- include/linux/atomic/atomic-arch-fallback.h | 15 ++++++++++++++- include/linux/atomic/atomic-instrumented.h | 10 +++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index b83ef19da13d..5e95faa959c4 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -428,6 +428,19 @@ extern void raw_cmpxchg128_relaxed_not_implemented(void); #define raw_sync_cmpxchg arch_sync_cmpxchg +#ifdef arch_sync_try_cmpxchg +#define raw_sync_try_cmpxchg arch_sync_try_cmpxchg +#else +#define raw_sync_try_cmpxchg(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = raw_sync_cmpxchg((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif + /** * raw_atomic_read() - atomic load with relaxed ordering * @v: pointer to atomic_t @@ -4649,4 +4662,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v) } #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 2fdd6702823fa842f9cea57a002e6e4476ae780c +// eec048affea735b8464f58e6d96992101f8f85f1 diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index d401b406ef7c..54d7bbe0aeaa 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -4998,6 +4998,14 @@ atomic_long_dec_if_positive(atomic_long_t *v) raw_try_cmpxchg128_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \ }) +#define sync_try_cmpxchg(ptr, ...) \ +({ \ + typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ + instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \ + raw_sync_try_cmpxchg(__ai_ptr, __VA_ARGS__); \ +}) + #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// 1568f875fef72097413caab8339120c065a39aa4 +// 2cc4bc990fef44d3836ec108f11b610f3f438184 -- cgit v1.2.3 From 9ae5c00ea2e600a8b823f9b95606dd244f3096bf Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 10 Oct 2023 09:31:38 +0100 Subject: sched/numa: Document vma_numab_state fields Document the intended usage of the fields. [ mingo: Reformatted to take less vertical space & tidied it up. ] Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231010083143.19593-2-mgorman@techsingularity.net --- include/linux/mm_types.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 36c5b43999e6..d7f042ec1f33 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -551,8 +551,29 @@ struct vma_lock { }; struct vma_numab_state { + /* + * Initialised as time in 'jiffies' after which VMA + * should be scanned. Delays first scan of new VMA by at + * least sysctl_numa_balancing_scan_delay: + */ unsigned long next_scan; + + /* + * Time in jiffies when access_pids[] is reset to + * detect phase change behaviour: + */ unsigned long next_pid_reset; + + /* + * Approximate tracking of PIDs that trapped a NUMA hinting + * fault. May produce false positives due to hash collisions. + * + * [0] Previous PID tracking + * [1] Current PID tracking + * + * Window moves after next_pid_reset has expired approximately + * every VMA_PID_RESET_PERIOD jiffies: + */ unsigned long access_pids[2]; }; -- cgit v1.2.3 From f3a6c97940fbd25d6c84c2d5642338fc99a9b35b Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 10 Oct 2023 09:31:39 +0100 Subject: sched/numa: Rename vma_numab_state::access_pids[] => ::pids_active[], ::next_pid_reset => ::pids_active_reset The access_pids[] field name is somewhat ambiguous as no PIDs are accessed. Similarly, it's not clear that next_pid_reset is related to access_pids[]. Rename the fields to more accurately reflect their purpose. [ mingo: Rename in the comments too. ] Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231010083143.19593-3-mgorman@techsingularity.net --- include/linux/mm.h | 4 ++-- include/linux/mm_types.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index bf5d0b1b16f4..19fc73b02c9f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1726,8 +1726,8 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) unsigned int pid_bit; pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG)); - if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->access_pids[1])) { - __set_bit(pid_bit, &vma->numab_state->access_pids[1]); + if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->pids_active[1])) { + __set_bit(pid_bit, &vma->numab_state->pids_active[1]); } } #else /* !CONFIG_NUMA_BALANCING */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d7f042ec1f33..e7571eca1131 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -559,10 +559,10 @@ struct vma_numab_state { unsigned long next_scan; /* - * Time in jiffies when access_pids[] is reset to + * Time in jiffies when pids_active[] is reset to * detect phase change behaviour: */ - unsigned long next_pid_reset; + unsigned long pids_active_reset; /* * Approximate tracking of PIDs that trapped a NUMA hinting @@ -574,7 +574,7 @@ struct vma_numab_state { * Window moves after next_pid_reset has expired approximately * every VMA_PID_RESET_PERIOD jiffies: */ - unsigned long access_pids[2]; + unsigned long pids_active[2]; }; /* -- cgit v1.2.3 From ed2da8b725b932b1e2b2f4835bb664d47ed03031 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 10 Oct 2023 09:31:40 +0100 Subject: sched/numa: Trace decisions related to skipping VMAs NUMA balancing skips or scans VMAs for a variety of reasons. In preparation for completing scans of VMAs regardless of PID access, trace the reasons why a VMA was skipped. In a later patch, the tracing will be used to track if a VMA was forcibly scanned. Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231010083143.19593-4-mgorman@techsingularity.net --- include/linux/sched/numa_balancing.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index 3988762efe15..c127a1509e2f 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -15,6 +15,14 @@ #define TNF_FAULT_LOCAL 0x08 #define TNF_MIGRATE_FAIL 0x10 +enum numa_vmaskip_reason { + NUMAB_SKIP_UNSUITABLE, + NUMAB_SKIP_SHARED_RO, + NUMAB_SKIP_INACCESSIBLE, + NUMAB_SKIP_SCAN_DELAY, + NUMAB_SKIP_PID_INACTIVE, +}; + #ifdef CONFIG_NUMA_BALANCING extern void task_numa_fault(int last_node, int node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); -- cgit v1.2.3 From 57ec42b9a1b7e4db4a1c2aa4fcc4eefe6d31bcb8 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Tue, 3 Oct 2023 15:53:39 +0800 Subject: i3c: Fix typo "Provisional ID" to "Provisioned ID" The MIPI I3C spec refers to a Provisioned ID, since it is (sometimes) provisioned at device manufacturing. Signed-off-by: Matt Johnston Acked-by: Rob Herring Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/20231003075339.197099-1-matt@codeconstruct.com.au Signed-off-by: Alexandre Belloni --- include/linux/i3c/device.h | 2 +- include/linux/i3c/master.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index 90fa83464f00..84ed77c04940 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -96,7 +96,7 @@ enum i3c_dcr { /** * struct i3c_device_info - I3C device information - * @pid: Provisional ID + * @pid: Provisioned ID * @bcr: Bus Characteristic Register * @dcr: Device Characteristic Register * @static_addr: static/I2C address diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 0b52da4f2346..4fd6a777150f 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -166,7 +166,7 @@ struct i3c_device_ibi_info { * assigned a dynamic address by the master. Will be used during * bus initialization to assign it a specific dynamic address * before starting DAA (Dynamic Address Assignment) - * @pid: I3C Provisional ID exposed by the device. This is a unique identifier + * @pid: I3C Provisioned ID exposed by the device. This is a unique identifier * that may be used to attach boardinfo to i3c_dev_desc when the device * does not have a static address * @of_node: optional DT node in case the device has been described in the DT -- cgit v1.2.3 From 295d3c441226d004d1ed59c4fcf62d5dba18d9e1 Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Sat, 30 Sep 2023 02:00:33 -0300 Subject: net: move sockfs_xattr_handlers to .rodata This makes it harder for accidental or malicious changes to sockfs_xattr_handlers at runtime. Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Richard Cochran Cc: netdev@vger.kernel.org Signed-off-by: Wedson Almeida Filho Link: https://lore.kernel.org/r/20230930050033.41174-30-wedsonaf@gmail.com Acked-by: Jakub Kicinski Signed-off-by: Christian Brauner --- include/linux/pseudo_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h index eceda1d1407a..730f77381d55 100644 --- a/include/linux/pseudo_fs.h +++ b/include/linux/pseudo_fs.h @@ -5,7 +5,7 @@ struct pseudo_fs_context { const struct super_operations *ops; - const struct xattr_handler **xattr; + const struct xattr_handler * const *xattr; const struct dentry_operations *dops; unsigned long magic; }; -- cgit v1.2.3 From 909f4abd1097769d024c3a9c2e59c2fbe5d2d0c0 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Thu, 28 Sep 2023 00:15:23 -0700 Subject: iommu: Add new iommu op to create domains owned by userspace Introduce a new iommu_domain op to create domains owned by userspace, e.g. through IOMMUFD. These domains have a few different properties compares to kernel owned domains: - They may be PAGING domains, but created with special parameters. For instance aperture size changes/number of levels, different IOPTE formats, or other things necessary to make a vIOMMU work - We have to track all the memory allocations with GFP_KERNEL_ACCOUNT to make the cgroup sandbox stronger - Device-specialty domains, such as NESTED domains can be created by IOMMUFD. The new op clearly says the domain is being created by IOMMUFD, that the domain is intended for userspace use, and it provides a way to pass user flags or a driver specific uAPI structure to customize the created domain to exactly what the vIOMMU userspace driver requires. iommu drivers that cannot support VFIO/IOMMUFD should not support this op. This includes any driver that cannot provide a fully functional PAGING domain. This new op for now is only supposed to be used by IOMMUFD, hence no wrapper for it. IOMMUFD would call the callback directly. As for domain free, IOMMUFD would use iommu_domain_free(). Link: https://lore.kernel.org/r/20230928071528.26258-2-yi.l.liu@intel.com Suggested-by: Jason Gunthorpe Signed-off-by: Lu Baolu Co-developed-by: Nicolin Chen Signed-off-by: Nicolin Chen Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c50a769d569a..3861d66b65c1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -234,7 +234,15 @@ struct iommu_iotlb_gather { * op is allocated in the iommu driver and freed by the caller after * use. The information type is one of enum iommu_hw_info_type defined * in include/uapi/linux/iommufd.h. - * @domain_alloc: allocate iommu domain + * @domain_alloc: allocate and return an iommu domain if success. Otherwise + * NULL is returned. The domain is not fully initialized until + * the caller iommu_domain_alloc() returns. + * @domain_alloc_user: Allocate an iommu domain corresponding to the input + * parameters as defined in include/uapi/linux/iommufd.h. + * Unlike @domain_alloc, it is called only by IOMMUFD and + * must fully initialize the new domain before return. + * Upon success, a domain is returned. Upon failure, + * ERR_PTR must be returned. * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU @@ -267,6 +275,7 @@ struct iommu_ops { /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); + struct iommu_domain *(*domain_alloc_user)(struct device *dev, u32 flags); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); -- cgit v1.2.3 From b7a5b537c55c088d891ae554103d1b281abef781 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 10 Oct 2023 09:31:42 +0100 Subject: sched/numa: Complete scanning of partial VMAs regardless of PID activity NUMA Balancing skips VMAs when the current task has not trapped a NUMA fault within the VMA. If the VMA is skipped then mm->numa_scan_offset advances and a task that is trapping faults within the VMA may never fully update PTEs within the VMA. Force tasks to update PTEs for partially scanned PTEs. The VMA will be tagged for NUMA hints by some task but this removes some of the benefit of tracking PID activity within a VMA. A follow-on patch will mitigate this problem. The test cases and machines evaluated did not trigger the corner case so the performance results are neutral with only small changes within the noise from normal test-to-test variance. However, the next patch makes the corner case easier to trigger. Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Tested-by: Raghavendra K T Link: https://lore.kernel.org/r/20231010083143.19593-6-mgorman@techsingularity.net --- include/linux/sched/numa_balancing.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index c127a1509e2f..7dcc0bdfddbb 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -21,6 +21,7 @@ enum numa_vmaskip_reason { NUMAB_SKIP_INACCESSIBLE, NUMAB_SKIP_SCAN_DELAY, NUMAB_SKIP_PID_INACTIVE, + NUMAB_SKIP_IGNORE_PID, }; #ifdef CONFIG_NUMA_BALANCING -- cgit v1.2.3 From f169c62ff7cd1acf8bac8ae17bfeafa307d9e6fa Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 10 Oct 2023 09:31:43 +0100 Subject: sched/numa: Complete scanning of inactive VMAs when there is no alternative VMAs are skipped if there is no recent fault activity but this represents a chicken-and-egg problem as there may be no fault activity if the PTEs are never updated to trap NUMA hints. There is an indirect reliance on scanning to be forced early in the lifetime of a task but this may fail to detect changes in phase behaviour. Force inactive VMAs to be scanned when all other eligible VMAs have been updated within the same scan sequence. Test results in general look good with some changes in performance, both negative and positive, depending on whether the additional scanning and faulting was beneficial or not to the workload. The autonuma benchmark workload NUMA01_THREADLOCAL was picked for closer examination. The workload creates two processes with numerous threads and thread-local storage that is zero-filled in a loop. It exercises the corner case where unrelated threads may skip VMAs that are thread-local to another thread and still has some VMAs that inactive while the workload executes. The VMA skipping activity frequency with and without the patch: 6.6.0-rc2-sched-numabtrace-v1 ============================= 649 reason=scan_delay 9,094 reason=unsuitable 48,915 reason=shared_ro 143,919 reason=inaccessible 193,050 reason=pid_inactive 6.6.0-rc2-sched-numabselective-v1 ============================= 146 reason=seq_completed 622 reason=ignore_pid_inactive 624 reason=scan_delay 6,570 reason=unsuitable 16,101 reason=shared_ro 27,608 reason=inaccessible 41,939 reason=pid_inactive Note that with the patch applied, the PID activity is ignored (ignore_pid_inactive) to ensure a VMA with some activity is completely scanned. In addition, a small number of VMAs are scanned when no other eligible VMA is available during a single scan window (seq_completed). The number of times a VMA is skipped due to no PID activity from the scanning task (pid_inactive) drops dramatically. It is expected that this will increase the number of PTEs updated for NUMA hinting faults as well as hinting faults but these represent PTEs that would otherwise have been missed. The tradeoff is scan+fault overhead versus improving locality due to migration. On a 2-socket Cascade Lake test machine, the time to complete the workload is as follows; 6.6.0-rc2 6.6.0-rc2 sched-numabtrace-v1 sched-numabselective-v1 Min elsp-NUMA01_THREADLOCAL 174.22 ( 0.00%) 117.64 ( 32.48%) Amean elsp-NUMA01_THREADLOCAL 175.68 ( 0.00%) 123.34 * 29.79%* Stddev elsp-NUMA01_THREADLOCAL 1.20 ( 0.00%) 4.06 (-238.20%) CoeffVar elsp-NUMA01_THREADLOCAL 0.68 ( 0.00%) 3.29 (-381.70%) Max elsp-NUMA01_THREADLOCAL 177.18 ( 0.00%) 128.03 ( 27.74%) The time to complete the workload is reduced by almost 30%: 6.6.0-rc2 6.6.0-rc2 sched-numabtrace-v1 sched-numabselective-v1 / Duration User 91201.80 63506.64 Duration System 2015.53 1819.78 Duration Elapsed 1234.77 868.37 In this specific case, system CPU time was not increased but it's not universally true. From vmstat, the NUMA scanning and fault activity is as follows; 6.6.0-rc2 6.6.0-rc2 sched-numabtrace-v1 sched-numabselective-v1 Ops NUMA base-page range updates 64272.00 26374386.00 Ops NUMA PTE updates 36624.00 55538.00 Ops NUMA PMD updates 54.00 51404.00 Ops NUMA hint faults 15504.00 75786.00 Ops NUMA hint local faults % 14860.00 56763.00 Ops NUMA hint local percent 95.85 74.90 Ops NUMA pages migrated 1629.00 6469222.00 Both the number of PTE updates and hint faults is dramatically increased. While this is superficially unfortunate, it represents ranges that were simply skipped without the patch. As a result of the scanning and hinting faults, many more pages were also migrated but as the time to completion is reduced, the overhead is offset by the gain. Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Tested-by: Raghavendra K T Link: https://lore.kernel.org/r/20231010083143.19593-7-mgorman@techsingularity.net --- include/linux/mm_types.h | 6 ++++++ include/linux/sched/numa_balancing.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e7571eca1131..589f31ef2e84 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -575,6 +575,12 @@ struct vma_numab_state { * every VMA_PID_RESET_PERIOD jiffies: */ unsigned long pids_active[2]; + + /* + * MM scan sequence ID when the VMA was last completely scanned. + * A VMA is not eligible for scanning if prev_scan_seq == numa_scan_seq + */ + int prev_scan_seq; }; /* diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index 7dcc0bdfddbb..b69afb8630db 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -22,6 +22,7 @@ enum numa_vmaskip_reason { NUMAB_SKIP_SCAN_DELAY, NUMAB_SKIP_PID_INACTIVE, NUMAB_SKIP_IGNORE_PID, + NUMAB_SKIP_SEQ_COMPLETED, }; #ifdef CONFIG_NUMA_BALANCING -- cgit v1.2.3 From 8527ca7735ef4cdad32c45853b0138f46ab2df58 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Oct 2023 14:41:21 -0700 Subject: net: skbuff: fix kernel-doc typos Correct punctuation and drop an extraneous word. Signed-off-by: Randy Dunlap Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231008214121.25940-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4174c4b82d13..97bfef071255 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1309,7 +1309,7 @@ struct sk_buff_fclones { * * Returns true if skb is a fast clone, and its clone is not freed. * Some drivers call skb_orphan() in their ndo_start_xmit(), - * so we also check that this didnt happen. + * so we also check that didn't happen. */ static inline bool skb_fclone_busy(const struct sock *sk, const struct sk_buff *skb) @@ -2016,7 +2016,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) * Copy shared buffers into a new sk_buff. We effectively do COW on * packets to handle cases where we have a local reader and forward * and a couple of other messy ones. The normal one is tcpdumping - * a packet thats being forwarded. + * a packet that's being forwarded. */ /** -- cgit v1.2.3 From 5247dbf16cee4e83eb89e4d3b87bd5e79c5d1655 Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Mon, 9 Oct 2023 19:16:33 +0800 Subject: net/core: Introduce netdev_core_stats_inc() Although there is a kfree_skb_reason() helper function that can be used to find the reason why this skb is dropped, but most callers didn't increase one of rx_dropped, tx_dropped, rx_nohandler and rx_otherhost_dropped. For the users, people are more concerned about why the dropped in ip is increasing. Introduce netdev_core_stats_inc() for trace the caller of dev_core_stats_*_inc(). Also, add __code to netdev_core_stats_alloc(), as it's called with small probability. And add noinline make sure netdev_core_stats_inc was never inlined. Signed-off-by: Yajun Deng Suggested-by: Alexander Lobakin Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e070a4540fba..11d704bfec9b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4002,32 +4002,19 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev, return false; } -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev); - -static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev) -{ - /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ - struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); - - if (likely(p)) - return p; - - return netdev_core_stats_alloc(dev); -} +void netdev_core_stats_inc(struct net_device *dev, u32 offset); #define DEV_CORE_STATS_INC(FIELD) \ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ { \ - struct net_device_core_stats __percpu *p; \ - \ - p = dev_core_stats(dev); \ - if (p) \ - this_cpu_inc(p->FIELD); \ + netdev_core_stats_inc(dev, \ + offsetof(struct net_device_core_stats, FIELD)); \ } DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) DEV_CORE_STATS_INC(rx_nohandler) DEV_CORE_STATS_INC(rx_otherhost_dropped) +#undef DEV_CORE_STATS_INC static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, -- cgit v1.2.3 From acd6199f195d6de814ac4090ce0864a613b1580e Mon Sep 17 00:00:00 2001 From: Wentong Wu Date: Mon, 9 Oct 2023 14:33:22 +0800 Subject: usb: Add support for Intel LJCA device Implements the USB part of Intel USB-I2C/GPIO/SPI adapter device named "La Jolla Cove Adapter" (LJCA). The communication between the various LJCA module drivers and the hardware will be muxed/demuxed by this driver. Three modules ( I2C, GPIO, and SPI) are supported currently. Each sub-module of LJCA device is identified by type field within the LJCA message header. The sub-modules of LJCA can use ljca_transfer() to issue a transfer between host and hardware. And ljca_register_event_cb is exported to LJCA sub-module drivers for hardware event subscription. The minimum code in ASL that covers this board is Scope (\_SB.PCI0.DWC3.RHUB.HS01) { Device (GPIO) { Name (_ADR, Zero) Name (_STA, 0x0F) } Device (I2C) { Name (_ADR, One) Name (_STA, 0x0F) } Device (SPI) { Name (_ADR, 0x02) Name (_STA, 0x0F) } } Signed-off-by: Wentong Wu Reviewed-by: Sakari Ailus Reviewed-by: Andi Shyti Tested-by: Hans de Goede Reviewed-by: Oliver Neukum Link: https://lore.kernel.org/r/1696833205-16716-2-git-send-email-wentong.wu@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/ljca.h | 145 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 include/linux/usb/ljca.h (limited to 'include/linux') diff --git a/include/linux/usb/ljca.h b/include/linux/usb/ljca.h new file mode 100644 index 000000000000..47661feda96c --- /dev/null +++ b/include/linux/usb/ljca.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023, Intel Corporation. All rights reserved. + */ +#ifndef _LINUX_USB_LJCA_H_ +#define _LINUX_USB_LJCA_H_ + +#include +#include +#include +#include + +#define LJCA_MAX_GPIO_NUM 64 + +#define auxiliary_dev_to_ljca_client(auxiliary_dev) \ + container_of(auxiliary_dev, struct ljca_client, auxdev) + +struct ljca_adapter; + +/** + * typedef ljca_event_cb_t - event callback function signature + * + * @context: the execution context of who registered this callback + * @cmd: the command from device for this event + * @evt_data: the event data payload + * @len: the event data payload length + * + * The callback function is called in interrupt context and the data payload is + * only valid during the call. If the user needs later access of the data, it + * must copy it. + */ +typedef void (*ljca_event_cb_t)(void *context, u8 cmd, const void *evt_data, int len); + +/** + * struct ljca_client - represent a ljca client device + * + * @type: ljca client type + * @id: ljca client id within same client type + * @link: ljca client on the same ljca adapter + * @auxdev: auxiliary device object + * @adapter: ljca adapter the ljca client sit on + * @context: the execution context of the event callback + * @event_cb: ljca client driver register this callback to get + * firmware asynchronous rx buffer pending notifications + * @event_cb_lock: spinlock to protect event callback + */ +struct ljca_client { + u8 type; + u8 id; + struct list_head link; + struct auxiliary_device auxdev; + struct ljca_adapter *adapter; + + void *context; + ljca_event_cb_t event_cb; + /* lock to protect event_cb */ + spinlock_t event_cb_lock; +}; + +/** + * struct ljca_gpio_info - ljca gpio client device info + * + * @num: ljca gpio client device pin number + * @valid_pin_map: ljca gpio client device valid pin mapping + */ +struct ljca_gpio_info { + unsigned int num; + DECLARE_BITMAP(valid_pin_map, LJCA_MAX_GPIO_NUM); +}; + +/** + * struct ljca_i2c_info - ljca i2c client device info + * + * @id: ljca i2c client device identification number + * @capacity: ljca i2c client device capacity + * @intr_pin: ljca i2c client device interrupt pin number if exists + */ +struct ljca_i2c_info { + u8 id; + u8 capacity; + u8 intr_pin; +}; + +/** + * struct ljca_spi_info - ljca spi client device info + * + * @id: ljca spi client device identification number + * @capacity: ljca spi client device capacity + */ +struct ljca_spi_info { + u8 id; + u8 capacity; +}; + +/** + * ljca_register_event_cb - register a callback function to receive events + * + * @client: ljca client device + * @event_cb: callback function + * @context: execution context of event callback + * + * Return: 0 in case of success, negative value in case of error + */ +int ljca_register_event_cb(struct ljca_client *client, ljca_event_cb_t event_cb, void *context); + +/** + * ljca_unregister_event_cb - unregister the callback function for an event + * + * @client: ljca client device + */ +void ljca_unregister_event_cb(struct ljca_client *client); + +/** + * ljca_transfer - issue a LJCA command and wait for a response + * + * @client: ljca client device + * @cmd: the command to be sent to the device + * @obuf: the buffer to be sent to the device; it can be NULL if the user + * doesn't need to transmit data with this command + * @obuf_len: the size of the buffer to be sent to the device; it should + * be 0 when obuf is NULL + * @ibuf: any data associated with the response will be copied here; it can be + * NULL if the user doesn't need the response data + * @ibuf_len: must be initialized to the input buffer size + * + * Return: the actual length of response data for success, negative value for errors + */ +int ljca_transfer(struct ljca_client *client, u8 cmd, const u8 *obuf, + u8 obuf_len, u8 *ibuf, u8 ibuf_len); + +/** + * ljca_transfer_noack - issue a LJCA command without a response + * + * @client: ljca client device + * @cmd: the command to be sent to the device + * @obuf: the buffer to be sent to the device; it can be NULL if the user + * doesn't need to transmit data with this command + * @obuf_len: the size of the buffer to be sent to the device + * + * Return: 0 for success, negative value for errors + */ +int ljca_transfer_noack(struct ljca_client *client, u8 cmd, const u8 *obuf, + u8 obuf_len); + +#endif -- cgit v1.2.3 From c4dd854f740c21ae8dd9903fc67969c5497cb14b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 25 Sep 2023 17:28:39 +0100 Subject: cpu-hotplug: Provide prototypes for arch CPU registration Provide common prototypes for arch_register_cpu() and arch_unregister_cpu(). These are called by acpi_processor.c, with weak versions, so the prototype for this is already set. It is generally not necessary for function prototypes to be conditional on preprocessor macros. Some architectures (e.g. Loongarch) are missing the prototype for this, and rather than add it to Loongarch's asm/cpu.h, do the job once for everyone. Since this covers everyone, remove the now unnecessary prototypes in asm/cpu.h, and therefore remove the 'static' from one of ia64's arch_register_cpu() definitions. [ tglx: Bring back the ia64 part and remove the ACPI prototypes ] Signed-off-by: Russell King (Oracle) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1qkoRr-0088Q8-Da@rmk-PC.armlinux.org.uk --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 0abd60a7987b..eb768a866fe3 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -80,6 +80,8 @@ extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, const char *fmt, ...); +extern int arch_register_cpu(int cpu); +extern void arch_unregister_cpu(int cpu); #ifdef CONFIG_HOTPLUG_CPU extern void unregister_cpu(struct cpu *cpu); extern ssize_t arch_cpu_probe(const char *, size_t); -- cgit v1.2.3 From 5987279373446e97206a7078b2229446ba871ea0 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 5 Oct 2023 19:50:29 -0500 Subject: iio: event: add optional event label support This adds a new optional field to struct iio_info to allow drivers to specify a label for the event. This is useful for cases where there are many events or the event attribute name is not descriptive enough or where an event doesn't have any other attributes. The implementation is based on the existing label support for channels. So either all events of a device have a label attribute or none do. Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20231005-ad2s1210-mainline-v4-12-ec00746840fc@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 7bfa1b9bc8a2..d0ce3b71106a 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -427,6 +427,8 @@ struct iio_trigger; /* forward declaration */ * @write_event_config: set if the event is enabled. * @read_event_value: read a configuration value associated with the event. * @write_event_value: write a configuration value for the event. + * @read_event_label: function to request label name for a specified label, + * for better event identification. * @validate_trigger: function to validate the trigger when the * current trigger gets changed. * @update_scan_mode: function to configure device and scan buffer when @@ -511,6 +513,12 @@ struct iio_info { enum iio_event_direction dir, enum iio_event_info info, int val, int val2); + int (*read_event_label)(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + enum iio_event_type type, + enum iio_event_direction dir, + char *label); + int (*validate_trigger)(struct iio_dev *indio_dev, struct iio_trigger *trig); int (*update_scan_mode)(struct iio_dev *indio_dev, -- cgit v1.2.3 From 21ca59b365c091d583f36ac753eaa8baf947be6f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 28 Oct 2021 12:31:14 +0200 Subject: binfmt_misc: enable sandboxed mounts Enable unprivileged sandboxes to create their own binfmt_misc mounts. This is based on Laurent's work in [1] but has been significantly reworked to fix various issues we identified in earlier versions. While binfmt_misc can currently only be mounted in the initial user namespace, binary types registered in this binfmt_misc instance are available to all sandboxes (Either by having them installed in the sandbox or by registering the binary type with the F flag causing the interpreter to be opened right away). So binfmt_misc binary types are already delegated to sandboxes implicitly. However, while a sandbox has access to all registered binary types in binfmt_misc a sandbox cannot currently register its own binary types in binfmt_misc. This has prevented various use-cases some of which were already outlined in [1] but we have a range of issues associated with this (cf. [3]-[5] below which are just a small sample). Extend binfmt_misc to be mountable in non-initial user namespaces. Similar to other filesystem such as nfsd, mqueue, and sunrpc we use keyed superblock management. The key determines whether we need to create a new superblock or can reuse an already existing one. We use the user namespace of the mount as key. This means a new binfmt_misc superblock is created once per user namespace creation. Subsequent mounts of binfmt_misc in the same user namespace will mount the same binfmt_misc instance. We explicitly do not create a new binfmt_misc superblock on every binfmt_misc mount as the semantics for load_misc_binary() line up with the keying model. This also allows us to retrieve the relevant binfmt_misc instance based on the caller's user namespace which can be done in a simple (bounded to 32 levels) loop. Similar to the current binfmt_misc semantics allowing access to the binary types in the initial binfmt_misc instance we do allow sandboxes access to their parent's binfmt_misc mounts if they do not have created a separate binfmt_misc instance. Overall, this will unblock the use-cases mentioned below and in general will also allow to support and harden execution of another architecture's binaries in tight sandboxes. For instance, using the unshare binary it possible to start a chroot of another architecture and configure the binfmt_misc interpreter without being root to run the binaries in this chroot and without requiring the host to modify its binary type handlers. Henning had already posted a few experiments in the cover letter at [1]. But here's an additional example where an unprivileged container registers qemu-user-static binary handlers for various binary types in its separate binfmt_misc mount and is then seamlessly able to start containers with a different architecture without affecting the host: root [lxc monitor] /var/snap/lxd/common/lxd/containers f1 1000000 \_ /sbin/init 1000000 \_ /lib/systemd/systemd-journald 1000000 \_ /lib/systemd/systemd-udevd 1000100 \_ /lib/systemd/systemd-networkd 1000101 \_ /lib/systemd/systemd-resolved 1000000 \_ /usr/sbin/cron -f 1000103 \_ /usr/bin/dbus-daemon --system --address=systemd: --nofork --nopidfile --systemd-activation --syslog-only 1000000 \_ /usr/bin/python3 /usr/bin/networkd-dispatcher --run-startup-triggers 1000104 \_ /usr/sbin/rsyslogd -n -iNONE 1000000 \_ /lib/systemd/systemd-logind 1000000 \_ /sbin/agetty -o -p -- \u --noclear --keep-baud console 115200,38400,9600 vt220 1000107 \_ dnsmasq --conf-file=/dev/null -u lxc-dnsmasq --strict-order --bind-interfaces --pid-file=/run/lxc/dnsmasq.pid --liste 1000000 \_ [lxc monitor] /var/lib/lxc f1-s390x 1100000 \_ /usr/bin/qemu-s390x-static /sbin/init 1100000 \_ /usr/bin/qemu-s390x-static /lib/systemd/systemd-journald 1100000 \_ /usr/bin/qemu-s390x-static /usr/sbin/cron -f 1100103 \_ /usr/bin/qemu-s390x-static /usr/bin/dbus-daemon --system --address=systemd: --nofork --nopidfile --systemd-ac 1100000 \_ /usr/bin/qemu-s390x-static /usr/bin/python3 /usr/bin/networkd-dispatcher --run-startup-triggers 1100104 \_ /usr/bin/qemu-s390x-static /usr/sbin/rsyslogd -n -iNONE 1100000 \_ /usr/bin/qemu-s390x-static /lib/systemd/systemd-logind 1100000 \_ /usr/bin/qemu-s390x-static /sbin/agetty -o -p -- \u --noclear --keep-baud console 115200,38400,9600 vt220 1100000 \_ /usr/bin/qemu-s390x-static /sbin/agetty -o -p -- \u --noclear --keep-baud pts/0 115200,38400,9600 vt220 1100000 \_ /usr/bin/qemu-s390x-static /sbin/agetty -o -p -- \u --noclear --keep-baud pts/1 115200,38400,9600 vt220 1100000 \_ /usr/bin/qemu-s390x-static /sbin/agetty -o -p -- \u --noclear --keep-baud pts/2 115200,38400,9600 vt220 1100000 \_ /usr/bin/qemu-s390x-static /sbin/agetty -o -p -- \u --noclear --keep-baud pts/3 115200,38400,9600 vt220 1100000 \_ /usr/bin/qemu-s390x-static /lib/systemd/systemd-udevd [1]: https://lore.kernel.org/all/20191216091220.465626-1-laurent@vivier.eu [2]: https://discuss.linuxcontainers.org/t/binfmt-misc-permission-denied [3]: https://discuss.linuxcontainers.org/t/lxd-binfmt-support-for-qemu-static-interpreters [4]: https://discuss.linuxcontainers.org/t/3-1-0-binfmt-support-service-in-unprivileged-guest-requires-write-access-on-hosts-proc-sys-fs-binfmt-misc [5]: https://discuss.linuxcontainers.org/t/qemu-user-static-not-working-4-11 Link: https://lore.kernel.org/r/20191216091220.465626-2-laurent@vivier.eu (origin) Link: https://lore.kernel.org/r/20211028103114.2849140-2-brauner@kernel.org (v1) Cc: Sargun Dhillon Cc: Serge Hallyn Cc: Jann Horn Cc: Henning Schild Cc: Andrei Vagin Cc: Al Viro Cc: Laurent Vivier Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Laurent Vivier Signed-off-by: Christian Brauner Signed-off-by: Christian Brauner Signed-off-by: Kees Cook --- /* v2 */ - Serge Hallyn : - Use GFP_KERNEL_ACCOUNT for userspace triggered allocations when a new binary type handler is registered. - Christian Brauner : - Switch authorship to me. I refused to do that earlier even though Laurent said I should do so because I think it's genuinely bad form. But by now I have changed so many things that it'd be unfair to blame Laurent for any potential bugs in here. - Add more comments that explain what's going on. - Rename functions while changing them to better reflect what they are doing to make the code easier to understand. - In the first version when a specific binary type handler was removed either through a write to the entry's file or all binary type handlers were removed by a write to the binfmt_misc mount's status file all cleanup work happened during inode eviction. That includes removal of the relevant entries from entry list. While that works fine I disliked that model after thinking about it for a bit. Because it means that there was a window were someone has already removed a or all binary handlers but they could still be safely reached from load_misc_binary() when it has managed to take the read_lock() on the entries list while inode eviction was already happening. Again, that perfectly benign but it's cleaner to remove the binary handler from the list immediately meaning that ones the write to then entry's file or the binfmt_misc status file returns the binary type cannot be executed anymore. That gives stronger guarantees to the user. --- include/linux/binfmts.h | 10 ++++++++++ include/linux/user_namespace.h | 8 ++++++++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 8d51f69f9f5e..70f97f685bff 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -90,6 +90,16 @@ struct linux_binfmt { #endif } __randomize_layout; +#if IS_ENABLED(CONFIG_BINFMT_MISC) +struct binfmt_misc { + struct list_head entries; + rwlock_t entries_lock; + bool enabled; +} __randomize_layout; + +extern struct binfmt_misc init_binfmt_misc; +#endif + extern void __register_binfmt(struct linux_binfmt *fmt, int insert); /* Registration of default binfmt handlers */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 45f09bec02c4..6030a8235617 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -65,6 +65,10 @@ enum rlimit_type { UCOUNT_RLIMIT_COUNTS, }; +#if IS_ENABLED(CONFIG_BINFMT_MISC) +struct binfmt_misc; +#endif + struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -102,6 +106,10 @@ struct user_namespace { struct ucounts *ucounts; long ucount_max[UCOUNT_COUNTS]; long rlimit_max[UCOUNT_RLIMIT_COUNTS]; + +#if IS_ENABLED(CONFIG_BINFMT_MISC) + struct binfmt_misc *binfmt_misc; +#endif } __randomize_layout; struct ucounts { -- cgit v1.2.3 From f6d7f050e258e3c71e310f5167c4d65bbefaeb31 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 10 Oct 2023 19:31:00 +0300 Subject: spi: Don't use flexible array in struct spi_message definition The struct spi_message can be embedded into another structures. With that the flexible array might be problematic as sparse complains about it, although there is no real issue in the code because when the message is embedded it doesn't use flexible array member. That memeber is a private to spi_message_alloc() API, so move it to that API in a form of an inherited data type. Reported-by: Marc Kleine-Budde Fixes: 75e308ffc4f0 ("spi: Use struct_size() helper")) Closes: https://lore.kernel.org/r/20231009-onshore-underage-c58415adfd92-mkl@pengutronix.de Signed-off-by: Andy Shevchenko Reviewed-by: Marc Kleine-Budde Link: https://lore.kernel.org/r/20231010163100.89734-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7f8b478fdeb3..487da1f6e4b7 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -1086,8 +1086,6 @@ struct spi_transfer { * @state: for use by whichever driver currently owns the message * @resources: for resource management when the SPI message is processed * @prepared: spi_prepare_message was called for the this message - * @t: for use with spi_message_alloc() when message and transfers have - * been allocated together * * A @spi_message is used to execute an atomic sequence of data transfers, * each represented by a struct spi_transfer. The sequence is "atomic" @@ -1142,9 +1140,6 @@ struct spi_message { /* List of spi_res resources when the SPI message is processed */ struct list_head resources; - - /* For embedding transfers into the memory of the message */ - struct spi_transfer t[]; }; static inline void spi_message_init_no_memset(struct spi_message *m) @@ -1203,17 +1198,21 @@ struct spi_transfer *xfers, unsigned int num_xfers) */ static inline struct spi_message *spi_message_alloc(unsigned ntrans, gfp_t flags) { - struct spi_message *m; + struct spi_message_with_transfers { + struct spi_message m; + struct spi_transfer t[]; + } *mwt; + unsigned i; + + mwt = kzalloc(struct_size(mwt, t, ntrans), flags); + if (!mwt) + return NULL; - m = kzalloc(struct_size(m, t, ntrans), flags); - if (m) { - unsigned i; + spi_message_init_no_memset(&mwt->m); + for (i = 0; i < ntrans; i++) + spi_message_add_tail(&mwt->t[i], &mwt->m); - spi_message_init_no_memset(m); - for (i = 0; i < ntrans; i++) - spi_message_add_tail(&m->t[i], m); - } - return m; + return &mwt->m; } static inline void spi_message_free(struct spi_message *m) -- cgit v1.2.3 From 9d77eb52778499a97cab662aa96de4e2e4fa72d3 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 24 Aug 2023 16:39:08 +0200 Subject: nvme-keyring: register '.nvme' keyring Register a '.nvme' keyring to hold keys for TLS and DH-HMAC-CHAP and add a new config option NVME_KEYRING. We need a separate keyring for NVMe as the configuration is done via individual commands (eg for configfs), and the usual per-session or per-process keyrings can't be used. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- include/linux/nvme-keyring.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/linux/nvme-keyring.h (limited to 'include/linux') diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h new file mode 100644 index 000000000000..32bd264a71e6 --- /dev/null +++ b/include/linux/nvme-keyring.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Hannes Reinecke, SUSE Labs + */ + +#ifndef _NVME_KEYRING_H +#define _NVME_KEYRING_H + +#ifdef CONFIG_NVME_KEYRING + +key_serial_t nvme_keyring_id(void); +int nvme_keyring_init(void); +void nvme_keyring_exit(void); + +#else + +static inline key_serial_t nvme_keyring_id(void) +{ + return 0; +} +static inline int nvme_keyring_init(void) +{ + return 0; +} +static inline void nvme_keyring_exit(void) {} + +#endif /* !CONFIG_NVME_KEYRING */ +#endif /* _NVME_KEYRING_H */ -- cgit v1.2.3 From 646f45b23218c7a97a84259d8eeb22dad5711fc8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 24 Aug 2023 16:39:10 +0200 Subject: nvme: add TCP TSAS definitions Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- include/linux/nvme.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 26dd3f859d9d..a7ba74babad7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -108,6 +108,13 @@ enum { NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */ }; +/* TSAS SECTYPE for TCP transport */ +enum { + NVMF_TCP_SECTYPE_NONE = 0, /* No Security */ + NVMF_TCP_SECTYPE_TLS12 = 1, /* TLSv1.2, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ + NVMF_TCP_SECTYPE_TLS13 = 2, /* TLSv1.3, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ +}; + #define NVME_AQ_DEPTH 32 #define NVME_NR_AEN_COMMANDS 1 #define NVME_AQ_BLK_MQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS) @@ -1493,6 +1500,9 @@ struct nvmf_disc_rsp_page_entry { __u16 pkey; __u8 resv10[246]; } rdma; + struct tcp { + __u8 sectype; + } tcp; } tsas; }; -- cgit v1.2.3 From a86062aac34d100a3117c0fff91ee1892ebfb460 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 24 Aug 2023 16:39:11 +0200 Subject: nvme-tcp: add definitions for TLS cipher suites Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- include/linux/nvme-tcp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 57ebe1267f7f..e07e8978d691 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -18,6 +18,12 @@ enum nvme_tcp_pfv { NVME_TCP_PFV_1_0 = 0x0, }; +enum nvme_tcp_tls_cipher { + NVME_TCP_TLS_CIPHER_INVALID = 0, + NVME_TCP_TLS_CIPHER_SHA256 = 1, + NVME_TCP_TLS_CIPHER_SHA384 = 2, +}; + enum nvme_tcp_fatal_error_status { NVME_TCP_FES_INVALID_PDU_HDR = 0x01, NVME_TCP_FES_PDU_SEQ_ERR = 0x02, -- cgit v1.2.3 From 501cc6f4aca8dc0958c4d9716f0233ba7cff4830 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 24 Aug 2023 16:39:12 +0200 Subject: nvme-keyring: implement nvme_tls_psk_default() Implement a function to select the preferred PSK for TLS. A 'retained' PSK should be preferred over a 'generated' PSK, and SHA-384 should be preferred to SHA-256. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- include/linux/nvme-keyring.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h index 32bd264a71e6..4efea9dd967c 100644 --- a/include/linux/nvme-keyring.h +++ b/include/linux/nvme-keyring.h @@ -8,12 +8,20 @@ #ifdef CONFIG_NVME_KEYRING +key_serial_t nvme_tls_psk_default(struct key *keyring, + const char *hostnqn, const char *subnqn); + key_serial_t nvme_keyring_id(void); int nvme_keyring_init(void); void nvme_keyring_exit(void); #else +static inline key_serial_t nvme_tls_psk_default(struct key *keyring, + const char *hostnqn, const char *subnqn) +{ + return 0; +} static inline key_serial_t nvme_keyring_id(void) { return 0; -- cgit v1.2.3 From 037c34318a479294cdb98dc8018edd5d191b68c0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 24 Aug 2023 16:39:13 +0200 Subject: security/keys: export key_lookup() For in-kernel consumers one cannot readily assign a user (eg when running from a workqueue), so the normal key search permissions cannot be applied. This patch exports the 'key_lookup()' function for a simple lookup of keys without checking for permissions. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Acked-by: David Howells Signed-off-by: Keith Busch --- include/linux/key.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 938d7ecfb495..943a432da3ae 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -515,6 +515,7 @@ extern void key_init(void); #define key_init() do { } while(0) #define key_free_user_ns(ns) do { } while(0) #define key_remove_domain(d) do { } while(0) +#define key_lookup(k) NULL #endif /* CONFIG_KEYS */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From 39c6eed1f61594f737160e498d29673edbd9eefd Mon Sep 17 00:00:00 2001 From: Maciej Wieczor-Retman Date: Tue, 10 Oct 2023 12:42:36 +0200 Subject: x86/resctrl: Rename arch_has_sparse_bitmaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename arch_has_sparse_bitmaps to arch_has_sparse_bitmasks to ensure consistent terminology throughout resctrl. Suggested-by: Reinette Chatre Signed-off-by: Maciej Wieczor-Retman Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Ilpo Järvinen Reviewed-by: Peter Newman Reviewed-by: Reinette Chatre Reviewed-by: Babu Moger Tested-by: Peter Newman Link: https://lore.kernel.org/r/e330fcdae873ef1a831e707025a4b70fa346666e.1696934091.git.maciej.wieczor-retman@intel.com --- include/linux/resctrl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 8334eeacfec5..66942d7fba7f 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -94,7 +94,7 @@ struct rdt_domain { * zero CBM. * @shareable_bits: Bitmask of shareable resource with other * executing entities - * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid. + * @arch_has_sparse_bitmasks: True if a bitmask like f00f is valid. * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache * level has CPU scope. */ @@ -102,7 +102,7 @@ struct resctrl_cache { unsigned int cbm_len; unsigned int min_cbm_bits; unsigned int shareable_bits; - bool arch_has_sparse_bitmaps; + bool arch_has_sparse_bitmasks; bool arch_has_per_cpu_cfg; }; -- cgit v1.2.3 From fefba7d1ae198dcbf8b3b432de46a4e29f8dbd8c Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Wed, 11 Oct 2023 20:51:04 +0200 Subject: bpf: Propagate modified uaddrlen from cgroup sockaddr programs As prep for adding unix socket support to the cgroup sockaddr hooks, let's propagate the sockaddr length back to the caller after running a bpf cgroup sockaddr hook program. While not important for AF_INET or AF_INET6, the sockaddr length is important when working with AF_UNIX sockaddrs as the size of the sockaddr cannot be determined just from the address family or the sockaddr's contents. __cgroup_bpf_run_filter_sock_addr() is modified to take the uaddrlen as an input/output argument. After running the program, the modified sockaddr length is stored in the uaddrlen pointer. Signed-off-by: Daan De Meyer Link: https://lore.kernel.org/r/20231011185113.140426-3-daan.j.demeyer@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf-cgroup.h | 73 +++++++++++++++++++++++----------------------- include/linux/filter.h | 1 + 2 files changed, 38 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 8506690dbb9c..31561e789715 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -120,6 +120,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags); @@ -230,22 +231,22 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \ +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, NULL); \ __ret; \ }) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \ +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - t_ctx, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, t_ctx, NULL); \ release_sock(sk); \ } \ __ret; \ @@ -256,14 +257,14 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE). */ -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \ +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, bind_flags) \ ({ \ u32 __flags = 0; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, &__flags); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, &__flags); \ release_sock(sk); \ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \ *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \ @@ -276,29 +277,29 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \ (sk)->sk_prot->pre_connect) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL) /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a * fullsock and its parent fullsock cannot be traced by @@ -477,24 +478,24 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, } #define cgroup_bpf_enabled(atype) (0) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) ({ 0; }) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) diff --git a/include/linux/filter.h b/include/linux/filter.h index ff7ecc89d3dd..bcd2bc15ff56 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1335,6 +1335,7 @@ struct bpf_sock_addr_kern { */ u64 tmp_reg; void *t_ctx; /* Attach type specific context. */ + u32 uaddrlen; }; struct bpf_sock_ops_kern { -- cgit v1.2.3 From 859051dd165ec6cc915f0f2114699021144fd249 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Wed, 11 Oct 2023 20:51:06 +0200 Subject: bpf: Implement cgroup sockaddr hooks for unix sockets These hooks allows intercepting connect(), getsockname(), getpeername(), sendmsg() and recvmsg() for unix sockets. The unix socket hooks get write access to the address length because the address length is not fixed when dealing with unix sockets and needs to be modified when a unix socket address is modified by the hook. Because abstract socket unix addresses start with a NUL byte, we cannot recalculate the socket address in kernelspace after running the hook by calculating the length of the unix socket path using strlen(). These hooks can be used when users want to multiplex syscall to a single unix socket to multiple different processes behind the scenes by redirecting the connect() and other syscalls to process specific sockets. We do not implement support for intercepting bind() because when using bind() with unix sockets with a pathname address, this creates an inode in the filesystem which must be cleaned up. If we rewrite the address, the user might try to clean up the wrong file, leaking the socket in the filesystem where it is never cleaned up. Until we figure out a solution for this (and a use case for intercepting bind()), we opt to not allow rewriting the sockaddr in bind() calls. We also implement recvmsg() support for connected streams so that after a connect() that is modified by a sockaddr hook, any corresponding recmvsg() on the connected socket can also be modified to make the connected program think it is connected to the "intended" remote. Reviewed-by: Kuniyuki Iwashima Signed-off-by: Daan De Meyer Link: https://lore.kernel.org/r/20231011185113.140426-5-daan.j.demeyer@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf-cgroup-defs.h | 5 +++++ include/linux/bpf-cgroup.h | 17 +++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index 7b121bd780eb..0985221d5478 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -28,19 +28,24 @@ enum cgroup_bpf_attach_type { CGROUP_INET6_BIND, CGROUP_INET4_CONNECT, CGROUP_INET6_CONNECT, + CGROUP_UNIX_CONNECT, CGROUP_INET4_POST_BIND, CGROUP_INET6_POST_BIND, CGROUP_UDP4_SENDMSG, CGROUP_UDP6_SENDMSG, + CGROUP_UNIX_SENDMSG, CGROUP_SYSCTL, CGROUP_UDP4_RECVMSG, CGROUP_UDP6_RECVMSG, + CGROUP_UNIX_RECVMSG, CGROUP_GETSOCKOPT, CGROUP_SETSOCKOPT, CGROUP_INET4_GETPEERNAME, CGROUP_INET6_GETPEERNAME, + CGROUP_UNIX_GETPEERNAME, CGROUP_INET4_GETSOCKNAME, CGROUP_INET6_GETSOCKNAME, + CGROUP_UNIX_GETSOCKNAME, CGROUP_INET_SOCK_RELEASE, CGROUP_LSM_START, CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 31561e789715..98b8cea904fe 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -48,19 +48,24 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET6_BIND); CGROUP_ATYPE(CGROUP_INET4_CONNECT); CGROUP_ATYPE(CGROUP_INET6_CONNECT); + CGROUP_ATYPE(CGROUP_UNIX_CONNECT); CGROUP_ATYPE(CGROUP_INET4_POST_BIND); CGROUP_ATYPE(CGROUP_INET6_POST_BIND); CGROUP_ATYPE(CGROUP_UDP4_SENDMSG); CGROUP_ATYPE(CGROUP_UDP6_SENDMSG); + CGROUP_ATYPE(CGROUP_UNIX_SENDMSG); CGROUP_ATYPE(CGROUP_SYSCTL); CGROUP_ATYPE(CGROUP_UDP4_RECVMSG); CGROUP_ATYPE(CGROUP_UDP6_RECVMSG); + CGROUP_ATYPE(CGROUP_UNIX_RECVMSG); CGROUP_ATYPE(CGROUP_GETSOCKOPT); CGROUP_ATYPE(CGROUP_SETSOCKOPT); CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); default: return CGROUP_BPF_ATTACH_TYPE_INVALID; @@ -289,18 +294,27 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL) + #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx) #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx) + #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL) #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL) + /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a * fullsock and its parent fullsock cannot be traced by * sk_to_full_sk(). @@ -492,10 +506,13 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) -- cgit v1.2.3 From 0a779003213b589d7b3eef72f69e19a30f603ebc Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Mon, 9 Oct 2023 15:37:51 +0200 Subject: netdev: make napi_schedule return bool on NAPI successful schedule Change napi_schedule to return a bool on NAPI successful schedule. This might be useful for some driver to do additional steps after a NAPI has been scheduled. Suggested-by: Eric Dumazet Signed-off-by: Christian Marangi Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231009133754.9834-2-ansuelsmth@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 11d704bfec9b..2874770a0a74 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -490,11 +490,18 @@ bool napi_schedule_prep(struct napi_struct *n); * * Schedule NAPI poll routine to be called if it is not already * running. + * Return true if we schedule a NAPI or false if not. + * Refer to napi_schedule_prep() for additional reason on why + * a NAPI might not be scheduled. */ -static inline void napi_schedule(struct napi_struct *n) +static inline bool napi_schedule(struct napi_struct *n) { - if (napi_schedule_prep(n)) + if (napi_schedule_prep(n)) { __napi_schedule(n); + return true; + } + + return false; } /** -- cgit v1.2.3 From 73382e919f3d938554dadd01d95760f90d1c25c1 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Mon, 9 Oct 2023 15:37:52 +0200 Subject: netdev: replace napi_reschedule with napi_schedule Now that napi_schedule return a bool, we can drop napi_reschedule that does the same exact function. The function comes from a very old commit bfe13f54f502 ("ibm_emac: Convert to use napi_struct independent of struct net_device") and the purpose is actually deprecated in favour of different logic. Convert every user of napi_reschedule to napi_schedule. Signed-off-by: Christian Marangi Acked-by: Jeff Johnson # ath10k Acked-by: Nick Child # ibm Acked-by: Marc Kleine-Budde # for can/dev/rx-offload.c Reviewed-by: Eric Dumazet Acked-by: Tariq Toukan Link: https://lore.kernel.org/r/20231009133754.9834-3-ansuelsmth@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2874770a0a74..ae553f886796 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -516,16 +516,6 @@ static inline void napi_schedule_irqoff(struct napi_struct *n) __napi_schedule_irqoff(n); } -/* Try to reschedule poll. Called by dev->poll() after napi_complete(). */ -static inline bool napi_reschedule(struct napi_struct *napi) -{ - if (napi_schedule_prep(napi)) { - __napi_schedule(napi); - return true; - } - return false; -} - /** * napi_complete_done - NAPI processing complete * @n: NAPI context -- cgit v1.2.3 From b9f29205c0182a2059b4dfa2883db5ef423574d4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 30 Sep 2023 15:14:27 -0700 Subject: iosys-map: fix kernel-doc typos Correct spelling of "beginning". Signed-off-by: Randy Dunlap Cc: Thomas Zimmermann Cc: dri-devel@lists.freedesktop.org Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230930221428.18463-2-rdunlap@infradead.org --- include/linux/iosys-map.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index cb71aa616bd3..1b06d074ade0 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -426,7 +426,7 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * iosys_map_rd_field - Read a member from a struct in the iosys_map * * @map__: The iosys_map structure - * @struct_offset__: Offset from the beggining of the map, where the struct + * @struct_offset__: Offset from the beginning of the map, where the struct * is located * @struct_type__: The struct describing the layout of the mapping * @field__: Member of the struct to read @@ -494,7 +494,7 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * iosys_map_wr_field - Write to a member of a struct in the iosys_map * * @map__: The iosys_map structure - * @struct_offset__: Offset from the beggining of the map, where the struct + * @struct_offset__: Offset from the beginning of the map, where the struct * is located * @struct_type__: The struct describing the layout of the mapping * @field__: Member of the struct to read -- cgit v1.2.3 From 668706b10c9b8181a53bd8881a77bb81b328ab33 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 11 Oct 2023 23:26:35 +0300 Subject: gpiolib: provide gpio_device_find_by_fwnode() One of the ways of looking up GPIO devices is using their fwnode. Provide a helper for that to avoid every user implementing their own matching function. Reviewed-by: Dipen Patel Tested-by: Dipen Patel Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20231010151709.4104747-2-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index f8ad7f40100c..ae4162d3f1d3 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -611,6 +611,7 @@ struct gpio_chip *gpiochip_find(void *data, struct gpio_device *gpio_device_find(void *data, int (*match)(struct gpio_chip *gc, void *data)); struct gpio_device *gpio_device_find_by_label(const char *label); +struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); -- cgit v1.2.3 From e001d1447cd4585d7f23a44ff668ba2bc624badb Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 12 Oct 2023 15:24:17 +0300 Subject: fs: factor out vfs_parse_monolithic_sep() helper Factor out vfs_parse_monolithic_sep() from generic_parse_monolithic(), so filesystems could use it with a custom option separator callback. Acked-by: Christian Brauner Signed-off-by: Amir Goldstein --- include/linux/fs_context.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 96332db693d5..c13e99cbbf81 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -136,6 +136,8 @@ extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param); extern int vfs_parse_fs_string(struct fs_context *fc, const char *key, const char *value, size_t v_size); +int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, + char *(*sep)(char **)); extern int generic_parse_monolithic(struct fs_context *fc, void *data); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); -- cgit v1.2.3 From 13cc9ee8f8ed58e563294d87d74a62006be40f21 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 12 Oct 2023 13:09:02 -0400 Subject: cgroup: Fix incorrect css_set_rwsem reference in comment Since commit f0d9a5f17575 ("cgroup: make css_set_rwsem a spinlock and rename it to css_set_lock"), css_set_rwsem has been replaced by css_set_lock. That commit, however, missed the css_set_rwsem reference in include/linux/cgroup-defs.h. Fix that by changing it to css_set_lock as well. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index f1b3151ac30b..265da00a1a8b 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -238,7 +238,7 @@ struct css_set { * Lists running through all tasks using this cgroup group. * mg_tasks lists tasks which belong to this cset but are in the * process of being migrated out or in. Protected by - * css_set_rwsem, but, during migration, once tasks are moved to + * css_set_lock, but, during migration, once tasks are moved to * mg_tasks, it can be read safely while holding cgroup_mutex. */ struct list_head tasks; -- cgit v1.2.3 From f06cc667f79909e9175460b167c277b7c64d3df0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 9 Oct 2023 23:04:25 +0200 Subject: perf: Optimize perf_cgroup_switch() Namhyung reported that bd2756811766 ("perf: Rewrite core context handling") regresses context switch overhead when perf-cgroup is in use together with 'slow' PMUs like uncore. Specifically, perf_cgroup_switch()'s perf_ctx_disable() / ctx_sched_out() etc.. all iterate the full list of active PMUs for that CPU, even if they don't have cgroup events. Previously there was cgrp_cpuctx_list which linked the relevant PMUs together, but that got lost in the rework. Instead of re-instruducing a similar list, let the perf_event_pmu_context iteration skip those that do not have cgroup events. This avoids growing multiple versions of the perf_event_pmu_context iteration. Measured performance (on a slightly different patch): Before) $ taskset -c 0 ./perf bench sched pipe -l 10000 -G AAA,BBB # Running 'sched/pipe' benchmark: # Executed 10000 pipe operations between two processes Total time: 0.901 [sec] 90.128700 usecs/op 11095 ops/sec After) $ taskset -c 0 ./perf bench sched pipe -l 10000 -G AAA,BBB # Running 'sched/pipe' benchmark: # Executed 10000 pipe operations between two processes Total time: 0.065 [sec] 6.560100 usecs/op 152436 ops/sec Fixes: bd2756811766 ("perf: Rewrite core context handling") Reported-by: Namhyung Kim Debugged-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20231009210425.GC6307@noisy.programming.kicks-ass.net --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f31f962a6445..0367d748fae0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -878,6 +878,7 @@ struct perf_event_pmu_context { unsigned int embedded : 1; unsigned int nr_events; + unsigned int nr_cgroups; atomic_t refcount; /* event <-> epc */ struct rcu_head rcu_head; -- cgit v1.2.3 From f995443f01b4dbcce723539b99050ce69b319e58 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 12 Oct 2023 16:31:58 +0200 Subject: locking/seqlock: Simplify SEQCOUNT_LOCKNAME() 1. Kill the "lockmember" argument. It is always s->lock plus __seqprop_##lockname##_sequence() already uses s->lock and ignores "lockmember". 2. Kill the "lock_acquire" argument. __seqprop_##lockname##_sequence() can use the same "lockbase" prefix for _lock and _unlock. Apart from line numbers, gcc -E outputs the same code. Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Waiman Long Cc: Will Deacon Cc: Thomas Gleixner Cc: Linus Torvalds Cc: Paul E. McKenney Link: https://lore.kernel.org/r/20231012143158.GA16133@redhat.com --- include/linux/seqlock.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index af518e4d0c6b..7e7109dbc3f5 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -191,11 +191,9 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t * @locktype: LOCKNAME canonical C data type * @preemptible: preemptibility of above locktype - * @lockmember: argument for lockdep_assert_held() - * @lockbase: associated lock release function (prefix only) - * @lock_acquire: associated lock acquisition function (full call) + * @lockbase: prefix for associated lock/unlock */ -#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \ +#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \ typedef struct seqcount_##lockname { \ seqcount_t seqcount; \ __SEQ_LOCK(locktype *lock); \ @@ -216,7 +214,7 @@ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ return seq; \ \ if (preemptible && unlikely(seq & 1)) { \ - __SEQ_LOCK(lock_acquire); \ + __SEQ_LOCK(lockbase##_lock(s->lock)); \ __SEQ_LOCK(lockbase##_unlock(s->lock)); \ \ /* \ @@ -242,7 +240,7 @@ __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ static __always_inline void \ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ { \ - __SEQ_LOCK(lockdep_assert_held(lockmember)); \ + __SEQ_LOCK(lockdep_assert_held(s->lock)); \ } /* @@ -271,10 +269,10 @@ static inline void __seqprop_assert(const seqcount_t *s) #define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT) -SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_spin, raw_spin_lock(s->lock)) -SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock)) -SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock)) -SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock)) +SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin) +SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin) +SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read) +SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) /* * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t -- cgit v1.2.3 From e6115c6f7a0ce3388cc60b69a284facf78b5dbfd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 12 Oct 2023 16:32:27 +0200 Subject: locking/seqlock: Change __seqprop() to return the function pointer This simplifies the macro and makes it easy to add the new seqprop's with 2 or more args. Plus this way we do not lose the type info, the (void*) type cast is no longer needed. And the latter reveals the problem: a lot of seqcount_t helpers pass the "const seqcount_t *s" argument to __seqprop_ptr(seqcount_t *s) but (before this patch) "(void *)(s)" masked the problem. So this patch changes __seqprop_ptr() and __seqprop_##lockname##_ptr() to accept the "const LOCKNAME *s" argument. This is not nice either, they need to drop the constness on return because these helpers are used by both the readers and writers, but at least it is clear what's going on. Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Waiman Long Cc: Will Deacon Cc: Thomas Gleixner Cc: Linus Torvalds Cc: Paul E. McKenney Link: https://lore.kernel.org/r/20231012143227.GA16143@redhat.com --- include/linux/seqlock.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 7e7109dbc3f5..4b8dcd3a0d93 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -200,9 +200,9 @@ typedef struct seqcount_##lockname { \ } seqcount_##lockname##_t; \ \ static __always_inline seqcount_t * \ -__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ +__seqprop_##lockname##_ptr(const seqcount_##lockname##_t *s) \ { \ - return &s->seqcount; \ + return (void *)&s->seqcount; /* drop const */ \ } \ \ static __always_inline unsigned \ @@ -247,9 +247,9 @@ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ * __seqprop() for seqcount_t */ -static inline seqcount_t *__seqprop_ptr(seqcount_t *s) +static inline seqcount_t *__seqprop_ptr(const seqcount_t *s) { - return s; + return (void *)s; /* drop const */ } static inline unsigned __seqprop_sequence(const seqcount_t *s) @@ -292,19 +292,19 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) #define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define __seqprop_case(s, lockname, prop) \ - seqcount_##lockname##_t: __seqprop_##lockname##_##prop((void *)(s)) + seqcount_##lockname##_t: __seqprop_##lockname##_##prop #define __seqprop(s, prop) _Generic(*(s), \ - seqcount_t: __seqprop_##prop((void *)(s)), \ + seqcount_t: __seqprop_##prop, \ __seqprop_case((s), raw_spinlock, prop), \ __seqprop_case((s), spinlock, prop), \ __seqprop_case((s), rwlock, prop), \ __seqprop_case((s), mutex, prop)) -#define seqprop_ptr(s) __seqprop(s, ptr) -#define seqprop_sequence(s) __seqprop(s, sequence) -#define seqprop_preemptible(s) __seqprop(s, preemptible) -#define seqprop_assert(s) __seqprop(s, assert) +#define seqprop_ptr(s) __seqprop(s, ptr)(s) +#define seqprop_sequence(s) __seqprop(s, sequence)(s) +#define seqprop_preemptible(s) __seqprop(s, preemptible)(s) +#define seqprop_assert(s) __seqprop(s, assert)(s) /** * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier -- cgit v1.2.3 From 7cbabed16464ad980c8d80b601559b36e1bc0b0c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 31 Jul 2023 05:25:36 +0300 Subject: thunderbolt: Fix typo in enum tb_link_width kernel-doc Typo trasmitters -> transmitters. Signed-off-by: Mika Westerberg Signed-off-by: Gil Fine --- include/linux/thunderbolt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 02333f47c994..6151c210d987 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -175,7 +175,7 @@ void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir); * enum tb_link_width - Thunderbolt/USB4 link width * @TB_LINK_WIDTH_SINGLE: Single lane link * @TB_LINK_WIDTH_DUAL: Dual lane symmetric link - * @TB_LINK_WIDTH_ASYM_TX: Dual lane asymmetric Gen 4 link with 3 trasmitters + * @TB_LINK_WIDTH_ASYM_TX: Dual lane asymmetric Gen 4 link with 3 transmitters * @TB_LINK_WIDTH_ASYM_RX: Dual lane asymmetric Gen 4 link with 3 receivers */ enum tb_link_width { -- cgit v1.2.3 From 1559d14977b694570f010854b8192e6de034bc27 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 11 Oct 2023 15:02:02 +0200 Subject: gpiolib: provide gpio_device_to_device() There are users in the kernel who need to retrieve the address of the struct device backing the GPIO device. Currently they needlessly poke in the internals of GPIOLIB. Add a dedicated getter function. Signed-off-by: Bartosz Golaszewski Reviewed-by: Peter Rosin Reviewed-by: Andy Shevchenko Acked-by: Linus Walleij --- include/linux/gpio/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index ae4162d3f1d3..0e40098aa283 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -619,6 +619,8 @@ void gpio_device_put(struct gpio_device *gdev); DEFINE_FREE(gpio_device_put, struct gpio_device *, if (IS_ERR_OR_NULL(_T)) gpio_device_put(_T)); +struct device *gpio_device_to_device(struct gpio_device *gdev); + bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset); int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_relres_irq(struct gpio_chip *gc, unsigned int offset); -- cgit v1.2.3 From 370232d096e3fe188e4596f77bc6560636bd40c1 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 11 Oct 2023 15:02:03 +0200 Subject: gpiolib: provide gpiod_to_gpio_device() Accessing struct gpio_chip backing a GPIO device is only allowed for the actual providers of that chip. Similarly to how we introduced gpio_device_find() in order to replace the abused gpiochip_find(), let's introduce a counterpart to gpiod_to_chip() that returns a reference to the GPIO device owning the descriptor. This is done in order to later remove gpiod_to_chip() entirely. Signed-off-by: Bartosz Golaszewski Reviewed-by: Peter Rosin Acked-by: Linus Walleij --- include/linux/gpio/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 0e40098aa283..d231c4f31cb4 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -785,6 +785,7 @@ int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset); struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); +struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc); #else /* CONFIG_GPIOLIB */ -- cgit v1.2.3 From 8c85a102fc4e5c0c942c10677fa43f7a19baa92f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 5 Sep 2023 20:52:55 +0200 Subject: gpiolib: provide gpio_device_get_base() Let's start adding getters for the opaque struct gpio_device. Start with a function allowing to retrieve the base GPIO number. Signed-off-by: Bartosz Golaszewski Acked-by: Linus Walleij --- include/linux/gpio/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index d231c4f31cb4..1d454dc944b3 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -787,6 +787,9 @@ void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset); struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc); +/* struct gpio_device getters */ +int gpio_device_get_base(struct gpio_device *gdev); + #else /* CONFIG_GPIOLIB */ #include -- cgit v1.2.3 From 384461abcab6602abc06c2dfb8fb99beeeaa12b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 4 Aug 2023 16:27:06 +0200 Subject: pwm: Manage owner assignment implicitly for drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of requiring each driver to care for assigning the owner member of struct pwm_ops, handle that implicitly using a macro. Note that the owner member has to be moved to struct pwm_chip, as the ops structure usually lives in read-only memory and so cannot be modified. The upside is that new low level drivers cannot forget the assignment and save one line each. The pwm-crc driver didn't assign .owner, that's not a problem in practice though as the driver cannot be compiled as a module. Acked-by: Andy Shevchenko # Intel LPSS Reviewed-by: Florian Fainelli # pwm-{bcm,brcm}*.c Acked-by: Jernej Skrabec # sun4i Acked-by: Andi Shyti Acked-by: Nobuhiro Iwamatsu # pwm-visconti Acked-by: Heiko Stuebner # pwm-rockchip Acked-by: Michael Walle # pwm-sl28cpld Acked-by: Neil Armstrong # pwm-meson Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20230804142707.412137-2-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/pwm.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index d2f9f690a9c1..56e3b7a09824 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -267,7 +267,6 @@ struct pwm_capture { * @get_state: get the current PWM state. This function is only * called once per PWM device when the PWM chip is * registered. - * @owner: helps prevent removal of modules exporting active PWMs */ struct pwm_ops { int (*request)(struct pwm_chip *chip, struct pwm_device *pwm); @@ -278,13 +277,13 @@ struct pwm_ops { const struct pwm_state *state); int (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); - struct module *owner; }; /** * struct pwm_chip - abstract a PWM controller * @dev: device providing the PWMs * @ops: callbacks for this PWM controller + * @owner: module providing this chip * @base: number of first PWM controlled by this chip * @npwm: number of PWMs controlled by this chip * @of_xlate: request a PWM device given a device tree PWM specifier @@ -295,6 +294,7 @@ struct pwm_ops { struct pwm_chip { struct device *dev; const struct pwm_ops *ops; + struct module *owner; int base; unsigned int npwm; @@ -386,10 +386,12 @@ int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, int pwm_set_chip_data(struct pwm_device *pwm, void *data); void *pwm_get_chip_data(struct pwm_device *pwm); -int pwmchip_add(struct pwm_chip *chip); +int __pwmchip_add(struct pwm_chip *chip, struct module *owner); +#define pwmchip_add(chip) __pwmchip_add(chip, THIS_MODULE) void pwmchip_remove(struct pwm_chip *chip); -int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip); +int __devm_pwmchip_add(struct device *dev, struct pwm_chip *chip, struct module *owner); +#define devm_pwmchip_add(dev, chip) __devm_pwmchip_add(dev, chip, THIS_MODULE) struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, unsigned int index, -- cgit v1.2.3 From a6e5654e0b8b53e2d0e316bc7cecb81dd8371f18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 5 Jul 2023 10:06:50 +0200 Subject: pwm: Drop pwm_[sg]et_chip_data() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The semantic of chip_data is a bit surprising as it's cleared when pwm_put() is called. Also there is a big overlap with the standard driver data. All drivers were adapted to not make use of chip_data any more, so it can go away. Link: https://lore.kernel.org/r/20230705080650.2353391-9-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/pwm.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 56e3b7a09824..e3b437587b32 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -71,7 +71,6 @@ struct pwm_state { * @hwpwm: per-chip relative index of the PWM device * @pwm: global index of the PWM device * @chip: PWM chip providing this PWM device - * @chip_data: chip-private data associated with the PWM device * @args: PWM arguments * @state: last applied state * @last: last implemented state (for PWM_DEBUG) @@ -82,7 +81,6 @@ struct pwm_device { unsigned int hwpwm; unsigned int pwm; struct pwm_chip *chip; - void *chip_data; struct pwm_args args; struct pwm_state state; @@ -383,8 +381,6 @@ static inline void pwm_disable(struct pwm_device *pwm) /* PWM provider APIs */ int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, unsigned long timeout); -int pwm_set_chip_data(struct pwm_device *pwm, void *data); -void *pwm_get_chip_data(struct pwm_device *pwm); int __pwmchip_add(struct pwm_chip *chip, struct module *owner); #define pwmchip_add(chip) __pwmchip_add(chip, THIS_MODULE) @@ -447,16 +443,6 @@ static inline int pwm_capture(struct pwm_device *pwm, return -EINVAL; } -static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data) -{ - return -EINVAL; -} - -static inline void *pwm_get_chip_data(struct pwm_device *pwm) -{ - return NULL; -} - static inline int pwmchip_add(struct pwm_chip *chip) { return -EINVAL; -- cgit v1.2.3 From 38985e8c278b82e6d4d62d4acd57c761cc23ce63 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 9 Oct 2023 13:06:08 +0300 Subject: net: Handle bulk delete policy in bridge driver The merge commit 92716869375b ("Merge branch 'br-flush-filtering'") added support for FDB flushing in bridge driver. The following patches will extend VXLAN driver to support FDB flushing as well. The netlink message for bulk delete is shared between the drivers. With the existing implementation, there is no way to prevent user from flushing with attributes that are not supported per driver. For example, when VNI will be added, user will not get an error for flush FDB entries in bridge with VNI, although this attribute is not relevant for bridge. As preparation for support of FDB flush in VXLAN driver, move the policy to be handled in bridge driver, later a new policy for VXLAN will be added in VXLAN driver. Do not pass 'vid' as part of ndo_fdb_del_bulk(), as this field is relevant only for bridge. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ae553f886796..1c7681263d30 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1287,9 +1287,7 @@ struct netdev_net_notifier { * struct net_device *dev, * const unsigned char *addr, u16 vid) * Deletes the FDB entry from dev coresponding to addr. - * int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, struct nlattr *tb[], - * struct net_device *dev, - * u16 vid, + * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, @@ -1564,10 +1562,8 @@ struct net_device_ops { struct net_device *dev, const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack); - int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, - struct nlattr *tb[], + int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, - u16 vid, struct netlink_ext_ack *extack); int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, -- cgit v1.2.3 From 03adc61edad49e1bbecfb53f7ea5d78f398fe368 Mon Sep 17 00:00:00 2001 From: Dan Clash Date: Thu, 12 Oct 2023 14:55:18 -0700 Subject: audit,io_uring: io_uring openat triggers audit reference count underflow An io_uring openat operation can update an audit reference count from multiple threads resulting in the call trace below. A call to io_uring_submit() with a single openat op with a flag of IOSQE_ASYNC results in the following reference count updates. These first part of the system call performs two increments that do not race. do_syscall_64() __do_sys_io_uring_enter() io_submit_sqes() io_openat_prep() __io_openat_prep() getname() getname_flags() /* update 1 (increment) */ __audit_getname() /* update 2 (increment) */ The openat op is queued to an io_uring worker thread which starts the opportunity for a race. The system call exit performs one decrement. do_syscall_64() syscall_exit_to_user_mode() syscall_exit_to_user_mode_prepare() __audit_syscall_exit() audit_reset_context() putname() /* update 3 (decrement) */ The io_uring worker thread performs one increment and two decrements. These updates can race with the system call decrement. io_wqe_worker() io_worker_handle_work() io_wq_submit_work() io_issue_sqe() io_openat() io_openat2() do_filp_open() path_openat() __audit_inode() /* update 4 (increment) */ putname() /* update 5 (decrement) */ __audit_uring_exit() audit_reset_context() putname() /* update 6 (decrement) */ The fix is to change the refcnt member of struct audit_names from int to atomic_t. kernel BUG at fs/namei.c:262! Call Trace: ... ? putname+0x68/0x70 audit_reset_context.part.0.constprop.0+0xe1/0x300 __audit_uring_exit+0xda/0x1c0 io_issue_sqe+0x1f3/0x450 ? lock_timer_base+0x3b/0xd0 io_wq_submit_work+0x8d/0x2b0 ? __try_to_del_timer_sync+0x67/0xa0 io_worker_handle_work+0x17c/0x2b0 io_wqe_worker+0x10a/0x350 Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/MW2PR2101MB1033FFF044A258F84AEAA584F1C9A@MW2PR2101MB1033.namprd21.prod.outlook.com/ Fixes: 5bd2182d58e9 ("audit,io_uring,io-wq: add some basic audit support to io_uring") Signed-off-by: Dan Clash Link: https://lore.kernel.org/r/20231012215518.GA4048@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b528f063e8ff..4a40823c3c67 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2403,7 +2403,7 @@ struct audit_names; struct filename { const char *name; /* pointer to actual string */ const __user char *uptr; /* original userland pointer */ - int refcnt; + atomic_t refcnt; struct audit_names *aname; const char iname[]; }; -- cgit v1.2.3 From 236334aeec0f93217cf9235f2004e61a0a1a5985 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 13 Oct 2023 08:39:16 +0000 Subject: bpf: Avoid unnecessary audit log for CPU security mitigations Check cpu_mitigations_off() first to avoid calling capable() if it is off. This can avoid unnecessary audit log. Fixes: bc5bc309db45 ("bpf: Inherit system settings for CPU security mitigations") Suggested-by: Andrii Nakryiko Signed-off-by: Yafang Shao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/CAEf4Bza6UVUWqcWQ-66weZ-nMDr+TFU3Mtq=dumZFD-pSqU7Ow@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20231013083916.4199-1-laoar.shao@gmail.com --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 61bde4520f5c..f0891ba24cb1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2164,12 +2164,12 @@ static inline bool bpf_allow_uninit_stack(void) static inline bool bpf_bypass_spec_v1(void) { - return perfmon_capable() || cpu_mitigations_off(); + return cpu_mitigations_off() || perfmon_capable(); } static inline bool bpf_bypass_spec_v4(void) { - return perfmon_capable() || cpu_mitigations_off(); + return cpu_mitigations_off() || perfmon_capable(); } int bpf_map_new_fd(struct bpf_map *map, int flags); -- cgit v1.2.3 From 84aefafe6b294041b7fa0757414c4a29c1bdeea2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 30 Sep 2023 15:14:26 -0700 Subject: clk: linux/clk-provider.h: fix kernel-doc warnings and typos Fix spelling of "Structure". Fix multiple kernel-doc warnings: clk-provider.h:269: warning: Function parameter or member 'recalc_rate' not described in 'clk_ops' clk-provider.h:468: warning: Function parameter or member 'parent_data' not described in 'clk_hw_register_fixed_rate_with_accuracy_parent_data' clk-provider.h:468: warning: Excess function parameter 'parent_name' description in 'clk_hw_register_fixed_rate_with_accuracy_parent_data' clk-provider.h:482: warning: Function parameter or member 'parent_data' not described in 'clk_hw_register_fixed_rate_parent_accuracy' clk-provider.h:482: warning: Excess function parameter 'parent_name' description in 'clk_hw_register_fixed_rate_parent_accuracy' clk-provider.h:687: warning: Function parameter or member 'flags' not described in 'clk_divider' clk-provider.h:1164: warning: Function parameter or member 'flags' not described in 'clk_fractional_divider' clk-provider.h:1164: warning: Function parameter or member 'approximation' not described in 'clk_fractional_divider' clk-provider.h:1213: warning: Function parameter or member 'flags' not described in 'clk_multiplier' Fixes: 9fba738a53dd ("clk: add duty cycle support") Fixes: b2476490ef11 ("clk: introduce the common clock framework") Fixes: 2d34f09e79c9 ("clk: fixed-rate: Add support for specifying parents via DT/pointers") Fixes: f5290d8e4f0c ("clk: asm9260: use parent index to link the reference clock") Fixes: 9d9f78ed9af0 ("clk: basic clock hardware types") Fixes: e2d0e90fae82 ("clk: new basic clk type for fractional divider") Fixes: f2e0a53271a4 ("clk: Add a basic multiplier clock") Signed-off-by: Randy Dunlap Cc: Michael Turquette Cc: Stephen Boyd Cc: linux-clk@vger.kernel.org Link: https://lore.kernel.org/r/20230930221428.18463-1-rdunlap@infradead.org Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index ec32ec58c59f..ace3a4ce2fc9 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -74,7 +74,7 @@ void clk_hw_forward_rate_request(const struct clk_hw *core, unsigned long parent_rate); /** - * struct clk_duty - Struture encoding the duty cycle ratio of a clock + * struct clk_duty - Structure encoding the duty cycle ratio of a clock * * @num: Numerator of the duty cycle ratio * @den: Denominator of the duty cycle ratio @@ -129,7 +129,7 @@ struct clk_duty { * @restore_context: Restore the context of the clock after a restoration * of power. * - * @recalc_rate Recalculate the rate of this clock, by querying hardware. The + * @recalc_rate: Recalculate the rate of this clock, by querying hardware. The * parent rate is an input parameter. It is up to the caller to * ensure that the prepare_mutex is held across this call. If the * driver cannot figure out a rate for this clock, it must return @@ -456,7 +456,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, * clock with the clock framework * @dev: device that is registering this clock * @name: name of this clock - * @parent_name: name of clock's parent + * @parent_data: name of clock's parent * @flags: framework-specific flags * @fixed_rate: non-adjustable clock rate * @fixed_accuracy: non-adjustable clock accuracy @@ -471,7 +471,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, * the clock framework * @dev: device that is registering this clock * @name: name of this clock - * @parent_name: name of clock's parent + * @parent_data: name of clock's parent * @flags: framework-specific flags * @fixed_rate: non-adjustable clock rate */ @@ -649,7 +649,7 @@ struct clk_div_table { * Clock with an adjustable divider affecting its output frequency. Implements * .recalc_rate, .set_rate and .round_rate * - * Flags: + * @flags: * CLK_DIVIDER_ONE_BASED - by default the divisor is the value read from the * register plus one. If CLK_DIVIDER_ONE_BASED is set then the divider is * the raw value read from the register, with the value of zero considered @@ -1130,11 +1130,12 @@ struct clk_hw *clk_hw_register_fixed_factor_parent_hw(struct device *dev, * @mwidth: width of the numerator bit field * @nshift: shift to the denominator bit field * @nwidth: width of the denominator bit field + * @approximation: clk driver's callback for calculating the divider clock * @lock: register lock * * Clock with adjustable fractional divider affecting its output frequency. * - * Flags: + * @flags: * CLK_FRAC_DIVIDER_ZERO_BASED - by default the numerator and denominator * is the value read from the register. If CLK_FRAC_DIVIDER_ZERO_BASED * is set then the numerator and denominator are both the value read @@ -1191,7 +1192,7 @@ void clk_hw_unregister_fractional_divider(struct clk_hw *hw); * Clock with an adjustable multiplier affecting its output frequency. * Implements .recalc_rate, .set_rate and .round_rate * - * Flags: + * @flags: * CLK_MULTIPLIER_ZERO_BYPASS - By default, the multiplier is the value read * from the register, with 0 being a valid value effectively * zeroing the output clock rate. If CLK_MULTIPLIER_ZERO_BYPASS is -- cgit v1.2.3 From 787650cc335201a0489905c5504a9179470ebc51 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 13 Oct 2023 20:04:12 -0700 Subject: Input: Annotate struct ff_device with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct ff_device. Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20231006201739.work.350-kees@kernel.org Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 49790c1bd2c4..de6503c0edb8 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -562,7 +562,7 @@ struct ff_device { int max_effects; struct ff_effect *effects; - struct file *effect_owners[]; + struct file *effect_owners[] __counted_by(max_effects); }; int input_ff_create(struct input_dev *dev, unsigned int max_effects); -- cgit v1.2.3 From 5b90073defd1a52aa8120403d79f6e0fc10c87ee Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sun, 8 Oct 2023 20:36:17 +0800 Subject: crypto: hisilicon/qm - alloc buffer to set and get xqc If the temporarily applied memory is used to set or get the xqc information, the driver releases the memory immediately after the hardware mailbox operation time exceeds the driver waiting time. However, the hardware does not cancel the operation, so the hardware may write data to released memory. Therefore, when the driver is bound to a device, the driver reserves memory for the xqc configuration. The subsequent xqc configuration uses the reserved memory to prevent hardware from accessing the released memory. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- include/linux/hisi_acc_qm.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 34c64a02712c..44e0c44a2e20 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -292,6 +292,18 @@ struct qm_err_isolate { struct list_head qm_hw_errs; }; +struct qm_rsv_buf { + struct qm_sqc *sqc; + struct qm_cqc *cqc; + struct qm_eqc *eqc; + struct qm_aeqc *aeqc; + dma_addr_t sqc_dma; + dma_addr_t cqc_dma; + dma_addr_t eqc_dma; + dma_addr_t aeqc_dma; + struct qm_dma qcdma; +}; + struct hisi_qm { enum qm_hw_ver ver; enum qm_fun_type fun_type; @@ -324,6 +336,7 @@ struct hisi_qm { dma_addr_t cqc_dma; dma_addr_t eqe_dma; dma_addr_t aeqe_dma; + struct qm_rsv_buf xqc_buf; struct hisi_qm_status status; const struct hisi_qm_err_ini *err_ini; -- cgit v1.2.3 From 886ee55eabac0d46faf8bc0b22207ca2740847ba Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Oct 2023 10:15:46 +0200 Subject: locking/seqlock: Propagate 'const' pointers within read-only methods, remove forced type casts Currently __seqprop_ptr() is an inline function that must chose to either use 'const' or non-const seqcount related pointers - but this results in the undesirable loss of 'const' propagation, via a forced type cast. The easiest solution would be to turn the pointer wrappers into macros that pass through whatever type is passed to them - but the clever maze of seqlock API instantiation macros relies on the GCC CPP '##' macro extension, which isn't recursive, so inline functions must be used here. So create two wrapper variants instead: 'ptr' and 'const_ptr', and pick the right one for the codepaths that are const: read_seqcount_begin() and read_seqcount_retry(). This cleans up type handling and allows the removal of all type forcing. No change in functionality. Signed-off-by: Ingo Molnar Reviewed-by: Oleg Nesterov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Waiman Long Cc: Will Deacon Cc: Thomas Gleixner Cc: Paul E. McKenney --- include/linux/seqlock.h | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 4b8dcd3a0d93..80f21d2ca2aa 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -200,9 +200,15 @@ typedef struct seqcount_##lockname { \ } seqcount_##lockname##_t; \ \ static __always_inline seqcount_t * \ -__seqprop_##lockname##_ptr(const seqcount_##lockname##_t *s) \ +__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ { \ - return (void *)&s->seqcount; /* drop const */ \ + return &s->seqcount; \ +} \ + \ +static __always_inline const seqcount_t * \ +__seqprop_##lockname##_const_ptr(const seqcount_##lockname##_t *s) \ +{ \ + return &s->seqcount; \ } \ \ static __always_inline unsigned \ @@ -247,9 +253,14 @@ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ * __seqprop() for seqcount_t */ -static inline seqcount_t *__seqprop_ptr(const seqcount_t *s) +static inline seqcount_t *__seqprop_ptr(seqcount_t *s) +{ + return s; +} + +static inline const seqcount_t *__seqprop_const_ptr(const seqcount_t *s) { - return (void *)s; /* drop const */ + return s; } static inline unsigned __seqprop_sequence(const seqcount_t *s) @@ -302,6 +313,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) __seqprop_case((s), mutex, prop)) #define seqprop_ptr(s) __seqprop(s, ptr)(s) +#define seqprop_const_ptr(s) __seqprop(s, const_ptr)(s) #define seqprop_sequence(s) __seqprop(s, sequence)(s) #define seqprop_preemptible(s) __seqprop(s, preemptible)(s) #define seqprop_assert(s) __seqprop(s, assert)(s) @@ -353,7 +365,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) */ #define read_seqcount_begin(s) \ ({ \ - seqcount_lockdep_reader_access(seqprop_ptr(s)); \ + seqcount_lockdep_reader_access(seqprop_const_ptr(s)); \ raw_read_seqcount_begin(s); \ }) @@ -419,7 +431,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) * Return: true if a read section retry is required, else false */ #define __read_seqcount_retry(s, start) \ - do___read_seqcount_retry(seqprop_ptr(s), start) + do___read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -439,7 +451,7 @@ static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) * Return: true if a read section retry is required, else false */ #define read_seqcount_retry(s, start) \ - do_read_seqcount_retry(seqprop_ptr(s), start) + do_read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start) { -- cgit v1.2.3 From 3f7f31fff2510272334f3d0374c432bdaa4f1536 Mon Sep 17 00:00:00 2001 From: Wei Zhang Date: Thu, 12 Oct 2023 12:27:36 -0700 Subject: net/mlx5: Parallelize vhca event handling At present, mlx5 driver have a general purpose event handler which not only handles vhca event but also many other events. This incurs a huge bottleneck because the event handler is implemented by single threaded workqueue and all events are forced to be handled in serial manner even though application tries to create multiple SFs simultaneously. Introduce a dedicated vhca event handler which manages SFs parallel creation. Signed-off-by: Wei Zhang Reviewed-by: Moshe Shemesh Reviewed-by: Shay Drory Reviewed-by: Jacob Keller Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 52e982bc0f50..7968c5ee85c4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -615,6 +615,7 @@ struct mlx5_priv { int adev_idx; int sw_vhca_id; struct mlx5_events *events; + struct mlx5_vhca_events *vhca_events; struct mlx5_flow_steering *steering; struct mlx5_mpfs *mpfs; -- cgit v1.2.3 From e534552c92a44690e48593f9567fe689545ded73 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 12 Oct 2023 12:27:39 -0700 Subject: net/mlx5: Refactor LAG peer device lookout bus logic to mlx5 devcom LAG peer device lookout bus logic required the usage of global lock, mlx5_intf_mutex. As part of the effort to remove this global lock, refactor LAG peer device lookout to use mlx5 devcom layer. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Reviewed-by: Jacob Keller Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7968c5ee85c4..f60cdc9bd40f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -624,6 +624,7 @@ struct mlx5_priv { struct mlx5_lag *lag; u32 flags; struct mlx5_devcom_dev *devc; + struct mlx5_devcom_comp_dev *hca_devcom_comp; struct mlx5_fw_reset *fw_reset; struct mlx5_core_roce roce; struct mlx5_fc_stats fc_stats; -- cgit v1.2.3 From 0d2d6bc7e74fee586f587d33484b797bb78f334c Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 12 Oct 2023 12:27:41 -0700 Subject: net/mlx5: Remove unused declaration Commit 2ac9cfe78223 ("net/mlx5e: IPSec, Add Innova IPSec offload TX data path") declared mlx5e_ipsec_inverse_table_init() but never implemented it. Commit f52f2faee581 ("net/mlx5e: Introduce flow steering API") declared mlx5e_fs_set_tc() but never implemented it. Commit f2f3df550139 ("net/mlx5: EQ, Privatize eq_table and friends") declared mlx5_eq_comp_cpumask() but never implemented it. Commit cac1eb2cf2e3 ("net/mlx5: Lag, properly lock eswitch if needed") removed mlx5_lag_update() but not its declaration. Commit 35ba005d820b ("net/mlx5: DR, Set flex parser for TNL_MPLS dynamically") removed mlx5dr_ste_build_tnl_mpls() but not its declaration. Commit e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") declared but never implemented mlx5_alloc_cmd_mailbox_chain() and mlx5_free_cmd_mailbox_chain(). Commit 0cf53c124756 ("net/mlx5: FWPage, Use async events chain") removed mlx5_core_req_pages_handler() but not its declaration. Commit 938fe83c8dcb ("net/mlx5_core: New device capabilities handling") removed mlx5_query_odp_caps() but not its declaration. Commit f6a8a19bb11b ("RDMA/netdev: Hoist alloc_netdev_mqs out of the driver") removed mlx5_rdma_netdev_alloc() but not its declaration. Signed-off-by: Yue Haibing Reviewed-by: Leon Romanovsky Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f60cdc9bd40f..d2b8d4a74a30 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1043,10 +1043,6 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev); int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf, int node); void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); -struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, - gfp_t flags, int npages); -void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, - struct mlx5_cmd_mailbox *head); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in, int inlen); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey); @@ -1060,8 +1056,6 @@ void mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev); void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev); -void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s32 npages, bool ec_function); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); @@ -1101,8 +1095,6 @@ int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); __be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev); void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); -int mlx5_query_odp_caps(struct mlx5_core_dev *dev, - struct mlx5_odp_caps *odp_caps); int mlx5_init_rl_table(struct mlx5_core_dev *dev); void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); @@ -1204,12 +1196,6 @@ int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev, void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev, int vf_id, struct notifier_block *nb); -#ifdef CONFIG_MLX5_CORE_IPOIB -struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, - struct ib_device *ibdev, - const char *name, - void (*setup)(struct net_device *)); -#endif /* CONFIG_MLX5_CORE_IPOIB */ int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev, struct ib_device *device, struct rdma_netdev_alloc_params *params); -- cgit v1.2.3 From 57f728d59f005dffdbb52a03531e480a71599bc5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 31 Jul 2023 22:08:17 -0700 Subject: cpumask: kernel-doc cleanups and additions Clean up some punctutation and abbreviations. Add kernel-doc notation for one function and function return value for 39 functions. cpumask.h: Fix some punctuation (plural vs. possessive). Fix some abbreviations (ie. -> i.e., id -> ID). Fix 35 warnings like this: include/linux/cpumask.h:161: warning: No description found for return value of 'cpumask_first' cpumask.c: Add Return: value for 4 functions. Add kernel-doc for cpumask_any_distribute(). Signed-off-by: Randy Dunlap Reviewed-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/linux/cpumask.h | 113 +++++++++++++++++++++++++++++------------------- 1 file changed, 69 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index f10fb87d49db..cfb545841a2c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -4,7 +4,7 @@ /* * Cpumasks provide a bitmap suitable for representing the - * set of CPU's in a system, one bit position per CPU number. In general, + * set of CPUs in a system, one bit position per CPU number. In general, * only nr_cpu_ids (<= NR_CPUS) bits are valid. */ #include @@ -97,7 +97,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * * If !CONFIG_HOTPLUG_CPU, present == possible, and active == online. * - * The cpu_possible_mask is fixed at boot time, as the set of CPU id's + * The cpu_possible_mask is fixed at boot time, as the set of CPU IDs * that it is possible might ever be plugged in at anytime during the * life of that system boot. The cpu_present_mask is dynamic(*), * representing which CPUs are currently plugged in. And @@ -112,7 +112,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * hotplug, it's a copy of cpu_possible_mask, hence fixed at boot. * * Subtleties: - * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode + * 1) UP ARCHes (NR_CPUS == 1, CONFIG_SMP not defined) hardcode * assumption that their single CPU is online. The UP * cpu_{online,possible,present}_masks are placebos. Changing them * will have no useful affect on the following num_*_cpus() @@ -155,7 +155,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu) * cpumask_first - get the first cpu in a cpumask * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ static inline unsigned int cpumask_first(const struct cpumask *srcp) { @@ -166,7 +166,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) * cpumask_first_zero - get the first unset cpu in a cpumask * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if all cpus are set. + * Return: >= nr_cpu_ids if all cpus are set. */ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) { @@ -178,7 +178,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) * @srcp1: the first input * @srcp2: the second input * - * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). + * Return: >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). */ static inline unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) @@ -190,7 +190,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask * cpumask_last - get the last CPU in a cpumask * @srcp: - the cpumask pointer * - * Returns >= nr_cpumask_bits if no CPUs set. + * Return: >= nr_cpumask_bits if no CPUs set. */ static inline unsigned int cpumask_last(const struct cpumask *srcp) { @@ -199,10 +199,10 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp) /** * cpumask_next - get the next cpu in a cpumask - * @n: the cpu prior to the place to search (ie. return will be > @n) + * @n: the cpu prior to the place to search (i.e. return will be > @n) * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if no further cpus set. + * Return: >= nr_cpu_ids if no further cpus set. */ static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) @@ -215,10 +215,10 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp) /** * cpumask_next_zero - get the next unset cpu in a cpumask - * @n: the cpu prior to the place to search (ie. return will be > @n) + * @n: the cpu prior to the place to search (i.e. return will be > @n) * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if no further cpus unset. + * Return: >= nr_cpu_ids if no further cpus unset. */ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) { @@ -254,11 +254,11 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp); /** * cpumask_next_and - get the next cpu in *src1p & *src2p - * @n: the cpu prior to the place to search (ie. return will be > @n) + * @n: the cpu prior to the place to search (i.e. return will be > @n) * @src1p: the first cpumask pointer * @src2p: the second cpumask pointer * - * Returns >= nr_cpu_ids if no further cpus set in both. + * Return: >= nr_cpu_ids if no further cpus set in both. */ static inline unsigned int cpumask_next_and(int n, const struct cpumask *src1p, @@ -373,7 +373,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * @cpu: the cpu to ignore. * * Often used to find any cpu but smp_processor_id() in a mask. - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ static inline unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) @@ -388,11 +388,11 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) } /** - * cpumask_nth - get the first cpu in a cpumask + * cpumask_nth - get the Nth cpu in a cpumask * @srcp: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) { @@ -400,12 +400,12 @@ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *s } /** - * cpumask_nth_and - get the first cpu in 2 cpumasks + * cpumask_nth_and - get the Nth cpu in 2 cpumasks * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static inline unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, @@ -416,12 +416,12 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, } /** - * cpumask_nth_andnot - get the first cpu set in 1st cpumask, and clear in 2nd. + * cpumask_nth_andnot - get the Nth cpu set in 1st cpumask, and clear in 2nd. * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static inline unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, @@ -436,9 +436,9 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer * @srcp3: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static __always_inline unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp1, @@ -497,7 +497,7 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns true if @cpu is set in @cpumask, else returns false + * Return: true if @cpu is set in @cpumask, else returns false */ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask) { @@ -509,9 +509,9 @@ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpum * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns true if @cpu is set in old bitmap of @cpumask, else returns false - * * test_and_set_bit wrapper for cpumasks. + * + * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { @@ -523,9 +523,9 @@ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cp * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns true if @cpu is set in old bitmap of @cpumask, else returns false - * * test_and_clear_bit wrapper for cpumasks. + * + * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) { @@ -560,7 +560,7 @@ static inline void cpumask_clear(struct cpumask *dstp) * @src1p: the first input * @src2p: the second input * - * If *@dstp is empty, returns false, else returns true + * Return: false if *@dstp is empty, else returns true */ static inline bool cpumask_and(struct cpumask *dstp, const struct cpumask *src1p, @@ -603,7 +603,7 @@ static inline void cpumask_xor(struct cpumask *dstp, * @src1p: the first input * @src2p: the second input * - * If *@dstp is empty, returns false, else returns true + * Return: false if *@dstp is empty, else returns true */ static inline bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p, @@ -617,6 +617,8 @@ static inline bool cpumask_andnot(struct cpumask *dstp, * cpumask_equal - *src1p == *src2p * @src1p: the first input * @src2p: the second input + * + * Return: true if the cpumasks are equal, false if not */ static inline bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p) @@ -630,6 +632,9 @@ static inline bool cpumask_equal(const struct cpumask *src1p, * @src1p: the first input * @src2p: the second input * @src3p: the third input + * + * Return: true if first cpumask ORed with second cpumask == third cpumask, + * otherwise false */ static inline bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src2p, @@ -643,6 +648,9 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p, * cpumask_intersects - (*src1p & *src2p) != 0 * @src1p: the first input * @src2p: the second input + * + * Return: true if first cpumask ANDed with second cpumask is non-empty, + * otherwise false */ static inline bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p) @@ -656,7 +664,7 @@ static inline bool cpumask_intersects(const struct cpumask *src1p, * @src1p: the first input * @src2p: the second input * - * Returns true if *@src1p is a subset of *@src2p, else returns false + * Return: true if *@src1p is a subset of *@src2p, else returns false */ static inline bool cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p) @@ -668,6 +676,8 @@ static inline bool cpumask_subset(const struct cpumask *src1p, /** * cpumask_empty - *srcp == 0 * @srcp: the cpumask to that all cpus < nr_cpu_ids are clear. + * + * Return: true if srcp is empty (has no bits set), else false */ static inline bool cpumask_empty(const struct cpumask *srcp) { @@ -677,6 +687,8 @@ static inline bool cpumask_empty(const struct cpumask *srcp) /** * cpumask_full - *srcp == 0xFFFFFFFF... * @srcp: the cpumask to that all cpus < nr_cpu_ids are set. + * + * Return: true if srcp is full (has all bits set), else false */ static inline bool cpumask_full(const struct cpumask *srcp) { @@ -686,6 +698,8 @@ static inline bool cpumask_full(const struct cpumask *srcp) /** * cpumask_weight - Count of bits in *srcp * @srcp: the cpumask to count bits (< nr_cpu_ids) in. + * + * Return: count of bits set in *srcp */ static inline unsigned int cpumask_weight(const struct cpumask *srcp) { @@ -696,6 +710,8 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp) * cpumask_weight_and - Count of bits in (*srcp1 & *srcp2) * @srcp1: the cpumask to count bits (< nr_cpu_ids) in. * @srcp2: the cpumask to count bits (< nr_cpu_ids) in. + * + * Return: count of bits set in both *srcp1 and *srcp2 */ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, const struct cpumask *srcp2) @@ -744,7 +760,7 @@ static inline void cpumask_copy(struct cpumask *dstp, * cpumask_any - pick a "random" cpu from *srcp * @srcp: the input cpumask * - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ #define cpumask_any(srcp) cpumask_first(srcp) @@ -753,7 +769,7 @@ static inline void cpumask_copy(struct cpumask *dstp, * @mask1: the first input cpumask * @mask2: the second input cpumask * - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ #define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2)) @@ -769,7 +785,7 @@ static inline void cpumask_copy(struct cpumask *dstp, * @len: the length of the buffer * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) @@ -783,7 +799,7 @@ static inline int cpumask_parse_user(const char __user *buf, int len, * @len: the length of the buffer * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp) @@ -797,7 +813,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, * @buf: the buffer to extract from * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) { @@ -809,7 +825,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) * @buf: the buffer to extract from * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) { @@ -817,7 +833,9 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) } /** - * cpumask_size - size to allocate for a 'struct cpumask' in bytes + * cpumask_size - calculate size to allocate for a 'struct cpumask' in bytes + * + * Return: size to allocate for a &struct cpumask in bytes */ static inline unsigned int cpumask_size(void) { @@ -831,7 +849,7 @@ static inline unsigned int cpumask_size(void) * little more difficult, we typedef cpumask_var_t to an array or a * pointer: doing &mask on an array is a noop, so it still works. * - * ie. + * i.e. * cpumask_var_t tmpmask; * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) * return -ENOMEM; @@ -887,6 +905,8 @@ bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) * a nop returning a constant 1 (in ). * * See alloc_cpumask_var_node. + * + * Return: %true if allocation succeeded, %false if not */ static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) @@ -1025,7 +1045,7 @@ set_cpu_dying(unsigned int cpu, bool dying) } /** - * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * + * to_cpumask - convert a NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap * * There are a few places where cpumask_var_t isn't appropriate and @@ -1068,6 +1088,8 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) * interface gives only a momentary snapshot and is not protected against * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held * region. + * + * Return: momentary snapshot of the number of online CPUs */ static __always_inline unsigned int num_online_cpus(void) { @@ -1160,7 +1182,7 @@ static inline bool cpu_dying(unsigned int cpu) * @mask: the cpumask to copy * @buf: the buffer to copy into * - * Returns the length of the (null-terminated) @buf string, zero if + * Return: the length of the (null-terminated) @buf string, zero if * nothing is copied. */ static inline ssize_t @@ -1183,7 +1205,7 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) * cpumask; Typically used by bin_attribute to export cpumask bitmask * ABI. * - * Returns the length of how many bytes have been copied, excluding + * Return: the length of how many bytes have been copied, excluding * terminating '\0'. */ static inline ssize_t @@ -1204,6 +1226,9 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, * * Everything is same with the above cpumap_print_bitmask_to_buf() * except the print format. + * + * Return: the length of how many bytes have been copied, excluding + * terminating '\0'. */ static inline ssize_t cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, -- cgit v1.2.3 From 7733aa893847f021c674d0d30b723d892109369d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 17 Aug 2023 19:20:54 +0300 Subject: bitmap: Remove dead code, i.e. bitmap_copy_le() Besides the fact it's not used anywhere it should be implemented differently, i.e. via helpers from linux/byteorder/generic.h. Yet the helpers themselves need to be introduced first. Also note, the function lacks of the test cases, they must be provided. Hence, drop the current dead code for good. Signed-off-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/linux/bitmap.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 03644237e1ef..1516ff979315 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -220,11 +220,6 @@ int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); -#ifdef __BIG_ENDIAN -void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); -#else -#define bitmap_copy_le bitmap_copy -#endif int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); -- cgit v1.2.3 From aae06fc1b5a2e4b52f8504a1f12f9b8b98e80641 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 7 Oct 2023 16:35:10 -0700 Subject: lib/bitmap: split-out string-related operations to a separate files lib/bitmap.c and corresponding include/linux/bitmap.h are intended to hold functions related to operations on bitmaps, like bitmap_shift or bitmap_set. Historically, some string-related operations like bitmap_parse are also reside in lib/bitmap.c. Now that the subsystem evolves, string-related bitmap operations became a significant part of the file. Because they are quite different from the other bitmap functions by nature, it's worth to split them to a separate source/header files. CC: Andrew Morton CC: Andy Shevchenko CC: Rasmus Villemoes Signed-off-by: Yury Norov --- include/linux/bitmap-str.h | 16 ++++++++++++++++ include/linux/bitmap.h | 18 +----------------- 2 files changed, 17 insertions(+), 17 deletions(-) create mode 100644 include/linux/bitmap-str.h (limited to 'include/linux') diff --git a/include/linux/bitmap-str.h b/include/linux/bitmap-str.h new file mode 100644 index 000000000000..17caeca94cab --- /dev/null +++ b/include/linux/bitmap-str.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_BITMAP_STR_H +#define __LINUX_BITMAP_STR_H + +int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); +int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); +extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, + int nmaskbits, loff_t off, size_t count); +extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, + int nmaskbits, loff_t off, size_t count); +int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *dst, int nbits); +int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); +int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, + unsigned long *dst, int nbits); + +#endif /* __LINUX_BITMAP_STR_H */ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 1516ff979315..1cca950a54ae 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -10,6 +10,7 @@ #include #include #include +#include struct device; @@ -200,14 +201,6 @@ bitmap_find_next_zero_area(unsigned long *map, align_mask, 0); } -int bitmap_parse(const char *buf, unsigned int buflen, - unsigned long *dst, int nbits); -int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, - unsigned long *dst, int nbits); -int bitmap_parselist(const char *buf, unsigned long *maskp, - int nmaskbits); -int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, - unsigned long *dst, int nbits); void bitmap_remap(unsigned long *dst, const unsigned long *src, const unsigned long *old, const unsigned long *new, unsigned int nbits); int bitmap_bitremap(int oldbit, @@ -220,15 +213,6 @@ int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); -int bitmap_print_to_pagebuf(bool list, char *buf, - const unsigned long *maskp, int nmaskbits); - -extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count); - -extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count); - #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) -- cgit v1.2.3 From 49dbe25adac42d3e06f65d1420946bec65896222 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 10 Oct 2023 22:15:14 +0300 Subject: vsock: read from socket's error queue This adds handling of MSG_ERRQUEUE input flag in receive call. This flag is used to read socket's error queue instead of data queue. Possible scenario of error queue usage is receiving completions for transmission with MSG_ZEROCOPY flag. This patch also adds new defines: 'SOL_VSOCK' and 'VSOCK_RECVERR'. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 39b74d83c7c4..cfcb7e2c3813 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -383,6 +383,7 @@ struct ucred { #define SOL_MPTCP 284 #define SOL_MCTP 285 #define SOL_SMC 286 +#define SOL_VSOCK 287 /* IPX options */ #define IPX_TYPE 1 -- cgit v1.2.3 From d7fbc0b7e846e9e0e70ae766d274b8720fbab412 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Wed, 11 Oct 2023 12:12:34 +0200 Subject: dpll: netlink/core: add support for pin-dpll signal phase offset/adjust Add callback ops for pin-dpll phase measurement. Add callback for pin signal phase adjustment. Add min and max phase adjustment values to pin proprties. Invoke callbacks in dpll_netlink.c when filling the pin details to provide user with phase related attribute values. Signed-off-by: Arkadiusz Kubalewski Signed-off-by: David S. Miller --- include/linux/dpll.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dpll.h b/include/linux/dpll.h index bbc480cd2932..578fc5fa3750 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -68,6 +68,18 @@ struct dpll_pin_ops { int (*prio_set)(const struct dpll_pin *pin, void *pin_priv, const struct dpll_device *dpll, void *dpll_priv, const u32 prio, struct netlink_ext_ack *extack); + int (*phase_offset_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s64 *phase_offset, + struct netlink_ext_ack *extack); + int (*phase_adjust_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s32 *phase_adjust, + struct netlink_ext_ack *extack); + int (*phase_adjust_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const s32 phase_adjust, + struct netlink_ext_ack *extack); }; struct dpll_pin_frequency { @@ -91,6 +103,11 @@ struct dpll_pin_frequency { #define DPLL_PIN_FREQUENCY_DCF77 \ DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_77_5_KHZ) +struct dpll_pin_phase_adjust_range { + s32 min; + s32 max; +}; + struct dpll_pin_properties { const char *board_label; const char *panel_label; @@ -99,6 +116,7 @@ struct dpll_pin_properties { unsigned long capabilities; u32 freq_supported_num; struct dpll_pin_frequency *freq_supported; + struct dpll_pin_phase_adjust_range phase_range; }; #if IS_ENABLED(CONFIG_DPLL) -- cgit v1.2.3 From fc8b2a619469378717e7270d2a4e1ef93c585f7a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 11 Oct 2023 10:01:14 -0400 Subject: net: more strict VIRTIO_NET_HDR_GSO_UDP_L4 validation Syzbot reported two new paths to hit an internal WARNING using the new virtio gso type VIRTIO_NET_HDR_GSO_UDP_L4. RIP: 0010:skb_checksum_help+0x4a2/0x600 net/core/dev.c:3260 skb len=64521 gso_size=344 and RIP: 0010:skb_warn_bad_offload+0x118/0x240 net/core/dev.c:3262 Older virtio types have historically had loose restrictions, leading to many entirely impractical fuzzer generated packets causing problems deep in the kernel stack. Ideally, we would have had strict validation for all types from the start. New virtio types can have tighter validation. Limit UDP GSO packets inserted via virtio to the same limits imposed by the UDP_SEGMENT socket interface: 1. must use checksum offload 2. checksum offload matches UDP header 3. no more segments than UDP_MAX_SEGMENTS 4. UDP GSO does not take modifier flags, notably SKB_GSO_TCP_ECN Fixes: 860b7f27b8f7 ("linux/virtio_net.h: Support USO offload in vnet header.") Reported-by: syzbot+01cdbc31e9c0ae9b33ac@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/0000000000005039270605eb0b7f@google.com/ Reported-by: syzbot+c99d835ff081ca30f986@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/0000000000005426680605eb0b9f@google.com/ Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Acked-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 7b4dd69555e4..27cc1d464321 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -3,8 +3,8 @@ #define _LINUX_VIRTIO_NET_H #include +#include #include -#include #include static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) @@ -151,9 +151,22 @@ retry: unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); - /* UFO may not include transport header in gso_size. */ - if (gso_type & SKB_GSO_UDP) + switch (gso_type & ~SKB_GSO_TCP_ECN) { + case SKB_GSO_UDP: + /* UFO may not include transport header in gso_size. */ nh_off -= thlen; + break; + case SKB_GSO_UDP_L4: + if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + return -EINVAL; + if (skb->csum_offset != offsetof(struct udphdr, check)) + return -EINVAL; + if (skb->len - p_off > gso_size * UDP_MAX_SEGMENTS) + return -EINVAL; + if (gso_type != SKB_GSO_UDP_L4) + return -EINVAL; + break; + } /* Kernel has a special handling for GSO_BY_FRAGS. */ if (gso_size == GSO_BY_FRAGS) -- cgit v1.2.3 From 60c6946675fc06dd2fd2b7a4b6fd1c1f046f1056 Mon Sep 17 00:00:00 2001 From: Xabier Marquiegui Date: Thu, 12 Oct 2023 00:39:53 +0200 Subject: posix-clock: introduce posix_clock_context concept Add the necessary structure to support custom private-data per posix-clock user. The previous implementation of posix-clock assumed all file open instances need access to the same clock structure on private_data. The need for individual data structures per file open instance has been identified when developing support for multiple timestamp event queue users for ptp_clock. Signed-off-by: Xabier Marquiegui Suggested-by: Richard Cochran Suggested-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- include/linux/posix-clock.h | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 468328b1e1dd..ef8619f48920 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -14,6 +14,7 @@ #include struct posix_clock; +struct posix_clock_context; /** * struct posix_clock_operations - functional interface to the clock @@ -50,18 +51,18 @@ struct posix_clock_operations { /* * Optional character device methods: */ - long (*ioctl) (struct posix_clock *pc, - unsigned int cmd, unsigned long arg); + long (*ioctl)(struct posix_clock_context *pccontext, unsigned int cmd, + unsigned long arg); - int (*open) (struct posix_clock *pc, fmode_t f_mode); + int (*open)(struct posix_clock_context *pccontext, fmode_t f_mode); - __poll_t (*poll) (struct posix_clock *pc, - struct file *file, poll_table *wait); + __poll_t (*poll)(struct posix_clock_context *pccontext, struct file *file, + poll_table *wait); - int (*release) (struct posix_clock *pc); + int (*release)(struct posix_clock_context *pccontext); - ssize_t (*read) (struct posix_clock *pc, - uint flags, char __user *buf, size_t cnt); + ssize_t (*read)(struct posix_clock_context *pccontext, uint flags, + char __user *buf, size_t cnt); }; /** @@ -90,6 +91,24 @@ struct posix_clock { bool zombie; }; +/** + * struct posix_clock_context - represents clock file operations context + * + * @clk: Pointer to the clock + * @private_clkdata: Pointer to user data + * + * Drivers should use struct posix_clock_context during specific character + * device file operation methods to access the posix clock. + * + * Drivers can store a private data structure during the open operation + * if they have specific information that is required in other file + * operations. + */ +struct posix_clock_context { + struct posix_clock *clk; + void *private_clkdata; +}; + /** * posix_clock_register() - register a new clock * @clk: Pointer to the clock. Caller must provide 'ops' field -- cgit v1.2.3 From 058b4d9de86b3f77cd23fbd43a0f5ee4ea8e0aeb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 11 Oct 2023 11:01:21 +0300 Subject: iommu: change iommu_map_sgtable to return signed values The iommu_map_sgtable() function returns ssize_t and negative error codes but it's declared as size_t instead. I think that static checkers would have complained if this caused a bug, but even though it doesn't cause a bug, it's definitely worth fixing. Signed-off-by: Dan Carpenter Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/06672b96-23fd-424c-8880-1626e7bf119c@moroto.mountain Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 64bd20142cbe..c28178f3690a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1116,7 +1116,7 @@ static inline void iommu_free_global_pasid(ioasid_t pasid) {} * Creates a mapping at @iova for the buffer described by a scatterlist * stored in the given sg_table object in the provided IOMMU domain. */ -static inline size_t iommu_map_sgtable(struct iommu_domain *domain, +static inline ssize_t iommu_map_sgtable(struct iommu_domain *domain, unsigned long iova, struct sg_table *sgt, int prot) { return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot, -- cgit v1.2.3 From f6ca3fb6978f94d95ee79f95085fc22e71ca17cc Mon Sep 17 00:00:00 2001 From: Rouven Czerwinski Date: Fri, 22 Sep 2023 16:17:16 +0200 Subject: mtd: rawnand: Ensure the nand chip supports cached reads Both the JEDEC and ONFI specification say that read cache sequential support is an optional command. This means that we not only need to check whether the individual controller supports the command, we also need to check the parameter pages for both ONFI and JEDEC NAND flashes before enabling sequential cache reads. This fixes support for NAND flashes which don't support enabling cache reads, i.e. Samsung K9F4G08U0F or Toshiba TC58NVG0S3HTA00. Sequential cache reads are now only available for ONFI and JEDEC devices, if individual vendors implement this, it needs to be enabled per vendor. Tested on i.MX6Q with a Samsung NAND flash chip that doesn't support sequential reads. Fixes: 003fe4b9545b ("mtd: rawnand: Support for sequential cache reads") Cc: stable@vger.kernel.org Signed-off-by: Rouven Czerwinski Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230922141717.35977-1-r.czerwinski@pengutronix.de --- include/linux/mtd/jedec.h | 3 +++ include/linux/mtd/onfi.h | 1 + include/linux/mtd/rawnand.h | 2 ++ 3 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/jedec.h b/include/linux/mtd/jedec.h index 0b6b59f7cfbd..56047a4e54c9 100644 --- a/include/linux/mtd/jedec.h +++ b/include/linux/mtd/jedec.h @@ -21,6 +21,9 @@ struct jedec_ecc_info { /* JEDEC features */ #define JEDEC_FEATURE_16_BIT_BUS (1 << 0) +/* JEDEC Optional Commands */ +#define JEDEC_OPT_CMD_READ_CACHE BIT(1) + struct nand_jedec_params { /* rev info and features block */ /* 'J' 'E' 'S' 'D' */ diff --git a/include/linux/mtd/onfi.h b/include/linux/mtd/onfi.h index a7376f9beddf..55ab2e4d62f9 100644 --- a/include/linux/mtd/onfi.h +++ b/include/linux/mtd/onfi.h @@ -55,6 +55,7 @@ #define ONFI_SUBFEATURE_PARAM_LEN 4 /* ONFI optional commands SET/GET FEATURES supported? */ +#define ONFI_OPT_CMD_READ_CACHE BIT(1) #define ONFI_OPT_CMD_SET_GET_FEATURES BIT(2) struct nand_onfi_params { diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 90a141ba2a5a..c29ace15a053 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -225,6 +225,7 @@ struct gpio_desc; * struct nand_parameters - NAND generic parameters from the parameter page * @model: Model name * @supports_set_get_features: The NAND chip supports setting/getting features + * @supports_read_cache: The NAND chip supports read cache operations * @set_feature_list: Bitmap of features that can be set * @get_feature_list: Bitmap of features that can be get * @onfi: ONFI specific parameters @@ -233,6 +234,7 @@ struct nand_parameters { /* Generic parameters */ const char *model; bool supports_set_get_features; + bool supports_read_cache; DECLARE_BITMAP(set_feature_list, ONFI_FEATURE_NUMBER); DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER); -- cgit v1.2.3 From f447318fb1d156b4b6da79266724c7ee347d1b59 Mon Sep 17 00:00:00 2001 From: Martin Kurbanov Date: Mon, 2 Oct 2023 17:04:58 +0300 Subject: mtd: spinand: add support for FORESEE F35SQA002G Add support for FORESEE F35SQA002G SPI NAND. Datasheet: https://www.longsys.com/uploads/LM-00006FORESEEF35SQA002GDatasheet_1650183701.pdf Signed-off-by: Martin Kurbanov Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20231002140458.147605-1-mmkurbanov@salutedevices.com --- include/linux/mtd/spinand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 3e285c09d16d..badb4c1ac079 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -263,6 +263,7 @@ struct spinand_manufacturer { extern const struct spinand_manufacturer alliancememory_spinand_manufacturer; extern const struct spinand_manufacturer ato_spinand_manufacturer; extern const struct spinand_manufacturer esmt_c8_spinand_manufacturer; +extern const struct spinand_manufacturer foresee_spinand_manufacturer; extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; extern const struct spinand_manufacturer macronix_spinand_manufacturer; extern const struct spinand_manufacturer micron_spinand_manufacturer; -- cgit v1.2.3 From 0da28d5fc808dfcfbc910870b4b0277c1a7ccb6c Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 15 Sep 2023 10:53:27 +0200 Subject: drm: renesas: shmobile: Remove backlight support Backlight support should be implemented by panels, not by the LCDC driver. As the feature is currently unused anyway, remove it. Signed-off-by: Laurent Pinchart [geert: Cleanups] Reviewed-by: Laurent Pinchart Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/69707650245bc2193d072f24723d4d5482ea590b.1694767209.git.geert+renesas@glider.be --- include/linux/platform_data/shmob_drm.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h index d661399b217d..b6b5b6607fb5 100644 --- a/include/linux/platform_data/shmob_drm.h +++ b/include/linux/platform_data/shmob_drm.h @@ -40,13 +40,6 @@ enum shmob_drm_interface { SHMOB_DRM_IFACE_SYS24, /* 24bpp */ }; -struct shmob_drm_backlight_data { - const char *name; - int max_brightness; - int (*get_brightness)(void); - int (*set_brightness)(int brightness); -}; - struct shmob_drm_panel_data { unsigned int width_mm; /* Panel width in mm */ unsigned int height_mm; /* Panel height in mm */ @@ -83,7 +76,6 @@ struct shmob_drm_platform_data { enum shmob_drm_clk_source clk_source; struct shmob_drm_interface_data iface; struct shmob_drm_panel_data panel; - struct shmob_drm_backlight_data backlight; }; #endif /* __SHMOB_DRM_H__ */ -- cgit v1.2.3 From 04ed052f3ab4b3c4c3e8451522ffaa84479bf0fb Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 15 Sep 2023 10:53:30 +0200 Subject: drm: renesas: shmobile: Remove support for SYS panels SYS panels are not used, and have no defined DT bindings. Remove their support to avoid impeding DT support. It can always be added back later. Signed-off-by: Laurent Pinchart Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/4ccca2a5ac05c73ea9fd6e44b8bc443fd9d14e0d.1694767209.git.geert+renesas@glider.be --- include/linux/platform_data/shmob_drm.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h index b6b5b6607fb5..b728e24222d9 100644 --- a/include/linux/platform_data/shmob_drm.h +++ b/include/linux/platform_data/shmob_drm.h @@ -27,17 +27,6 @@ enum shmob_drm_interface { SHMOB_DRM_IFACE_RGB18, /* 18bpp */ SHMOB_DRM_IFACE_RGB24, /* 24bpp */ SHMOB_DRM_IFACE_YUV422, /* 16bpp */ - SHMOB_DRM_IFACE_SYS8A, /* 24bpp, 8:8:8 */ - SHMOB_DRM_IFACE_SYS8B, /* 18bpp, 8:8:2 */ - SHMOB_DRM_IFACE_SYS8C, /* 18bpp, 2:8:8 */ - SHMOB_DRM_IFACE_SYS8D, /* 16bpp, 8:8 */ - SHMOB_DRM_IFACE_SYS9, /* 18bpp, 9:9 */ - SHMOB_DRM_IFACE_SYS12, /* 24bpp, 12:12 */ - SHMOB_DRM_IFACE_SYS16A, /* 16bpp */ - SHMOB_DRM_IFACE_SYS16B, /* 18bpp, 16:2 */ - SHMOB_DRM_IFACE_SYS16C, /* 18bpp, 2:16 */ - SHMOB_DRM_IFACE_SYS18, /* 18bpp */ - SHMOB_DRM_IFACE_SYS24, /* 24bpp */ }; struct shmob_drm_panel_data { @@ -46,19 +35,6 @@ struct shmob_drm_panel_data { struct drm_mode_modeinfo mode; }; -struct shmob_drm_sys_interface_data { - unsigned int read_latch:6; - unsigned int read_setup:8; - unsigned int read_cycle:8; - unsigned int read_strobe:8; - unsigned int write_setup:8; - unsigned int write_cycle:8; - unsigned int write_strobe:8; - unsigned int cs_setup:3; - unsigned int vsync_active_high:1; - unsigned int vsync_dir_input:1; -}; - #define SHMOB_DRM_IFACE_FL_DWPOL (1 << 0) /* Rising edge dot clock data latch */ #define SHMOB_DRM_IFACE_FL_DIPOL (1 << 1) /* Active low display enable */ #define SHMOB_DRM_IFACE_FL_DAPOL (1 << 2) /* Active low display data */ @@ -67,7 +43,6 @@ struct shmob_drm_sys_interface_data { struct shmob_drm_interface_data { enum shmob_drm_interface interface; - struct shmob_drm_sys_interface_data sys; unsigned int clk_div; unsigned int flags; }; -- cgit v1.2.3 From 6a6ab0c7162b4b10ce74347e282e1bc81103a48f Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 15 Sep 2023 10:53:38 +0200 Subject: drm: renesas: shmobile: Use struct videomode in platform data Replace the drm_mode_modeinfo field with videomode that includes more signal polarity flags. This simplifies driver handling of panel modes and prepares for DT support. Signed-off-by: Laurent Pinchart [geert: Simplify] Reviewed-by: Laurent Pinchart Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/4312e56de424d94399c6105e7159317eae86c9d5.1694767209.git.geert+renesas@glider.be --- include/linux/platform_data/shmob_drm.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h index b728e24222d9..f3cb19ff9f81 100644 --- a/include/linux/platform_data/shmob_drm.h +++ b/include/linux/platform_data/shmob_drm.h @@ -10,7 +10,7 @@ #ifndef __SHMOB_DRM_H__ #define __SHMOB_DRM_H__ -#include +#include