From 31c9590ae468478fe47dc0f5f0d3562b2f69450e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 18 Apr 2020 21:06:23 -0400 Subject: SUNRPC: Add "@len" parameter to gss_unwrap() Refactor: This is a pre-requisite to fixing the client-side ralign computation in gss_unwrap_resp_priv(). The length value is passed in explicitly rather that as the value of buf->len. This will subsequently allow gss_unwrap_kerberos_v1() to compute a slack and align value, instead of computing it in gss_unwrap_resp_priv(). Fixes: 35e77d21baa0 ("SUNRPC: Add rpc_auth::au_ralign field") Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_api.h | 2 ++ include/linux/sunrpc/gss_krb5.h | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 48c1b1674cbf..e9a79518d652 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -66,6 +66,7 @@ u32 gss_wrap( u32 gss_unwrap( struct gss_ctx *ctx_id, int offset, + int len, struct xdr_buf *inbuf); u32 gss_delete_sec_context( struct gss_ctx **ctx_id); @@ -126,6 +127,7 @@ struct gss_api_ops { u32 (*gss_unwrap)( struct gss_ctx *ctx_id, int offset, + int len, struct xdr_buf *buf); void (*gss_delete_sec_context)( void *internal_ctx_id); diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index c1d77dd8ed41..e8f8ffe7448b 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -83,7 +83,7 @@ struct gss_krb5_enctype { u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, struct page **pages); /* v2 encryption function */ - u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, + u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, u32 len, struct xdr_buf *buf, u32 *headskip, u32 *tailskip); /* v2 decryption function */ }; @@ -255,7 +255,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset, struct xdr_buf *outbuf, struct page **pages); u32 -gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, +gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, int len, struct xdr_buf *buf); @@ -312,7 +312,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, struct page **pages); u32 -gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, +gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, struct xdr_buf *buf, u32 *plainoffset, u32 *plainlen); -- cgit v1.2.3 From a7e429a6fa6d612d1dacde96c885dc1bb4a9f400 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 18 Apr 2020 14:38:19 -0400 Subject: SUNRPC: Fix GSS privacy computation of auth->au_ralign When the au_ralign field was added to gss_unwrap_resp_priv, the wrong calculation was used. Setting au_rslack == au_ralign is probably correct for kerberos_v1 privacy, but kerberos_v2 privacy adds additional GSS data after the clear text RPC message. au_ralign needs to be smaller than au_rslack in that fairly common case. When xdr_buf_trim() is restored to gss_unwrap_kerberos_v2(), it does exactly what I feared it would: it trims off part of the clear text RPC message. However, that's because rpc_prepare_reply_pages() does not set up the rq_rcv_buf's tail correctly because au_ralign is too large. Fixing the au_ralign computation also corrects the alignment of rq_rcv_buf->pages so that the client does not have to shift reply data payloads after they are received. Fixes: 35e77d21baa0 ("SUNRPC: Add rpc_auth::au_ralign field") Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_api.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index e9a79518d652..bc07e51f20d1 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -21,6 +21,7 @@ struct gss_ctx { struct gss_api_mech *mech_type; void *internal_ctx_id; + unsigned int slack, align; }; #define GSS_C_NO_BUFFER ((struct xdr_netobj) 0) -- cgit v1.2.3 From 0a8e7b7d08466b5fc52f8e96070acc116d82a8bb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 15 Apr 2020 17:36:22 -0400 Subject: SUNRPC: Revert 241b1f419f0e ("SUNRPC: Remove xdr_buf_trim()") I've noticed that when krb5i or krb5p security is in use, retransmitted requests are missing the server's duplicate reply cache. The computed checksum on the retransmitted request does not match the cached checksum, resulting in the server performing the retransmitted request again instead of returning the cached reply. The assumptions made when removing xdr_buf_trim() were not correct. In the send paths, the upper layer has already set the segment lengths correctly, and shorting the buffer's content is simply a matter of reducing buf->len. xdr_buf_trim() is the right answer in the receive/unwrap path on both the client and the server. The buffer segment lengths have to be shortened one-by-one. On the server side in particular, head.iov_len needs to be updated correctly to enable nfsd_cache_csum() to work correctly. The simple buf->len computation doesn't do that, and that results in checksumming stale data in the buffer. The problem isn't noticed until there's significant instability of the RPC transport. At that point, the reliability of retransmit detection on the server becomes crucial. Fixes: 241b1f419f0e ("SUNRPC: Remove xdr_buf_trim()") Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 01bb41908c93..22c207b2425f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -184,6 +184,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern void xdr_buf_trim(struct xdr_buf *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); -- cgit v1.2.3 From 501be6c1c72417eab05e7413671a38ea991a8ebc Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 25 Mar 2020 21:16:03 +0100 Subject: drm/tegra: Fix SMMU support on Tegra124 and Tegra210 When testing whether or not to enable the use of the SMMU, consult the supported DMA mask rather than the actually configured DMA mask, since the latter might already have been restricted. Fixes: 2d9384ff9177 ("drm/tegra: Relax IOMMU usage criteria on old Tegra") Tested-by: Jon Hunter Signed-off-by: Thierry Reding --- include/linux/host1x.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 62d216ff1097..c230b4e70d75 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -17,9 +17,12 @@ enum host1x_class { HOST1X_CLASS_GR3D = 0x60, }; +struct host1x; struct host1x_client; struct iommu_group; +u64 host1x_get_dma_mask(struct host1x *host1x); + /** * struct host1x_client_ops - host1x client operations * @init: host1x client initialization code -- cgit v1.2.3 From 9495b7e92f716ab2bd6814fab5e97ab4a39adfdd Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 22 Apr 2020 12:09:54 +0200 Subject: driver core: platform: Initialize dma_parms for platform devices It's currently the platform driver's responsibility to initialize the pointer, dma_parms, for its corresponding struct device. The benefit with this approach allows us to avoid the initialization and to not waste memory for the struct device_dma_parameters, as this can be decided on a case by case basis. However, it has turned out that this approach is not very practical. Not only does it lead to open coding, but also to real errors. In principle callers of dma_set_max_seg_size() doesn't check the error code, but just assumes it succeeds. For these reasons, let's do the initialization from the common platform bus at the device registration point. This also follows the way the PCI devices are being managed, see pci_device_add(). Suggested-by: Christoph Hellwig Cc: Tested-by: Haibo Chen Reviewed-by: Arnd Bergmann Signed-off-by: Ulf Hansson Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20200422100954.31211-1-ulf.hansson@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index bdc35753ef7c..77a2aada106d 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -25,6 +25,7 @@ struct platform_device { bool id_auto; struct device dev; u64 platform_dma_mask; + struct device_dma_parameters dma_parms; u32 num_resources; struct resource *resource; -- cgit v1.2.3 From f458488425f1cc9a396aa1d09bb00c48783936da Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 22 Apr 2020 12:10:13 +0200 Subject: amba: Initialize dma_parms for amba devices It's currently the amba driver's responsibility to initialize the pointer, dma_parms, for its corresponding struct device. The benefit with this approach allows us to avoid the initialization and to not waste memory for the struct device_dma_parameters, as this can be decided on a case by case basis. However, it has turned out that this approach is not very practical. Not only does it lead to open coding, but also to real errors. In principle callers of dma_set_max_seg_size() doesn't check the error code, but just assumes it succeeds. For these reasons, let's do the initialization from the common amba bus at the device registration point. This also follows the way the PCI devices are being managed, see pci_device_add(). Suggested-by: Christoph Hellwig Cc: Russell King Cc: Tested-by: Haibo Chen Reviewed-by: Arnd Bergmann Signed-off-by: Ulf Hansson Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20200422101013.31267-1-ulf.hansson@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/amba/bus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 26f0ecf401ea..0bbfd647f5c6 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -65,6 +65,7 @@ struct amba_device { struct device dev; struct resource res; struct clk *pclk; + struct device_dma_parameters dma_parms; unsigned int periphid; unsigned int cid; struct amba_cs_uci_id uci; -- cgit v1.2.3 From 54261af473be4c5481f6196064445d2945f2bdab Mon Sep 17 00:00:00 2001 From: KP Singh Date: Thu, 30 Apr 2020 17:52:40 +0200 Subject: security: Fix the default value of fs_context_parse_param hook security_fs_context_parse_param is called by vfs_parse_fs_param and a succussful return value (i.e 0) implies that a parameter will be consumed by the LSM framework. This stops all further parsing of the parmeter by VFS. Furthermore, if an LSM hook returns a success, the remaining LSM hooks are not invoked for the parameter. The current default behavior of returning success means that all the parameters are expected to be parsed by the LSM hook and none of them end up being populated by vfs in fs_context This was noticed when lsm=bpf is supplied on the command line before any other LSM. As the bpf lsm uses this default value to implement a default hook, this resulted in a failure to parse any fs_context parameters and a failure to mount the root filesystem. Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Reported-by: Mikko Ylinen Signed-off-by: KP Singh Signed-off-by: James Morris --- include/linux/lsm_hook_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 9cd4455528e5..1bdd027766d4 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -55,7 +55,7 @@ LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm) LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm) LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, struct fs_context *src_sc) -LSM_HOOK(int, 0, fs_context_parse_param, struct fs_context *fc, +LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc, struct fs_parameter *param) LSM_HOOK(int, 0, sb_alloc_security, struct super_block *sb) LSM_HOOK(void, LSM_RET_VOID, sb_free_security, struct super_block *sb) -- cgit v1.2.3 From 115f32512f13c0280161908e9de45a97a87673bb Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 1 May 2020 00:35:50 +0530 Subject: bus: mhi: Fix parsing of mhi_flags With the current parsing of mhi_flags, the following statement always return false: eob = !!(flags & MHI_EOB); This is due to the fact that 'enum mhi_flags' starts with index 0 and we are using direct AND operation to extract each bit. Fix this by using BIT() macros for defining the flags so that the reset of the code need not be touched. Fixes: 189ff97cca53 ("bus: mhi: core: Add support for data transfer") Reported-by: Dan Carpenter Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-2-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index ad1996001965..5642806360f3 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -53,9 +53,9 @@ enum mhi_callback { * @MHI_CHAIN: Linked transfer */ enum mhi_flags { - MHI_EOB, - MHI_EOT, - MHI_CHAIN, + MHI_EOB = BIT(0), + MHI_EOT = BIT(1), + MHI_CHAIN = BIT(2), }; /** -- cgit v1.2.3 From 85a087df4a719ebab940efa3c79625e68161f57b Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 1 May 2020 00:35:52 +0530 Subject: bus: mhi: core: Remove link_status() callback If the MHI core detects invalid data due to a PCI read, it calls into the controller via link_status() to double check that the link is infact down. All in all, this is pretty pointless, and racy. There are no good reasons for this, and only drawbacks. Its pointless because chances are, the controller is going to do the same thing to determine if the link is down - attempt a PCI access and compare the result. This does not make the link status decision any smarter. Its racy because its possible that the link was down at the time of the MHI core access, but then recovered before the controller access. In this case, the controller will indicate the link is not down, and the MHI core will precede to use a bad value as the MHI core does not attempt to retry the access. Retrying the access in the MHI core is a bad idea because again, it is racy - what if the link is down again? Furthermore, there may be some higher level state associated with the link status, that is now invalid because the link went down. The only reason why the MHI core could see "invalid" data when doing a PCI access, that is actually valid, is if the register actually contained the PCI spec defined sentinel for an invalid access. In this case, it is arguable that the MHI implementation broken, and should be fixed, not worked around. Therefore, remove the link_status() callback before anyone attempts to implement it. Signed-off-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Reviewed-by: Hemant Kumar Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-4-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 5642806360f3..c80ba559face 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -335,7 +335,6 @@ struct mhi_controller_config { * @syserr_worker: System error worker * @state_event: State change event * @status_cb: CB function to notify power states of the device (required) - * @link_status: CB function to query link status of the device (required) * @wake_get: CB function to assert device wake (optional) * @wake_put: CB function to de-assert device wake (optional) * @wake_toggle: CB function to assert and de-assert device wake (optional) @@ -417,7 +416,6 @@ struct mhi_controller { void (*status_cb)(struct mhi_controller *mhi_cntrl, enum mhi_callback cb); - int (*link_status)(struct mhi_controller *mhi_cntrl); void (*wake_get)(struct mhi_controller *mhi_cntrl, bool override); void (*wake_put)(struct mhi_controller *mhi_cntrl, bool override); void (*wake_toggle)(struct mhi_controller *mhi_cntrl); -- cgit v1.2.3 From 45723a44845c90c8e859fd0e2b0bb492322b5d0b Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 1 May 2020 00:35:53 +0530 Subject: bus: mhi: core: Offload register accesses to the controller When reading or writing MHI registers, the core assumes that the physical link is a memory mapped PCI link. This assumption may not hold for all MHI devices. The controller knows what is the physical link (ie PCI, I2C, SPI, etc), and therefore knows the proper methods to access that link. The controller can also handle link specific error scenarios, such as reading -1 when the PCI link went down. Therefore, it is appropriate that the MHI core requests the controller to make register accesses on behalf of the core, which abstracts the core from link specifics, and end up removing an unnecessary assumption. Signed-off-by: Jeffrey Hugo Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-5-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index c80ba559face..84a6c9e72f52 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -342,6 +342,8 @@ struct mhi_controller_config { * @runtimet_put: CB function to decrement pm usage (required) * @map_single: CB function to create TRE buffer * @unmap_single: CB function to destroy TRE buffer + * @read_reg: Read a MHI register via the physical link (required) + * @write_reg: Write a MHI register via the physical link (required) * @buffer_len: Bounce buffer length * @bounce_buf: Use of bounce buffer * @fbc_download: MHI host needs to do complete image transfer (optional) @@ -425,6 +427,10 @@ struct mhi_controller { struct mhi_buf_info *buf); void (*unmap_single)(struct mhi_controller *mhi_cntrl, struct mhi_buf_info *buf); + int (*read_reg)(struct mhi_controller *mhi_cntrl, void __iomem *addr, + u32 *out); + void (*write_reg)(struct mhi_controller *mhi_cntrl, void __iomem *addr, + u32 val); size_t buffer_len; bool bounce_buf; -- cgit v1.2.3 From af2e58818082ac0db29539444ca17eb1e77f6000 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 1 May 2020 00:35:54 +0530 Subject: bus: mhi: core: Fix typo in comment There is a typo - "runtimet" should be "runtime". Fix it. Signed-off-by: Jeffrey Hugo Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-6-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 84a6c9e72f52..3d7c3c26eeb9 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -339,7 +339,7 @@ struct mhi_controller_config { * @wake_put: CB function to de-assert device wake (optional) * @wake_toggle: CB function to assert and de-assert device wake (optional) * @runtime_get: CB function to controller runtime resume (required) - * @runtimet_put: CB function to decrement pm usage (required) + * @runtime_put: CB function to decrement pm usage (required) * @map_single: CB function to create TRE buffer * @unmap_single: CB function to destroy TRE buffer * @read_reg: Read a MHI register via the physical link (required) -- cgit v1.2.3 From 81aabbb9fb7b4b1efd073b62f0505d3adad442f3 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 4 May 2020 10:21:44 -0700 Subject: bpf, sockmap: bpf_tcp_ingress needs to subtract bytes from sg.size In bpf_tcp_ingress we used apply_bytes to subtract bytes from sg.size which is used to track total bytes in a message. But this is not correct because apply_bytes is itself modified in the main loop doing the mem_charge. Then at the end of this we have sg.size incorrectly set and out of sync with actual sk values. Then we can get a splat if we try to cork the data later and again try to redirect the msg to ingress. To fix instead of trying to track msg.size do the easy thing and include it as part of the sk_msg_xfer logic so that when the msg is moved the sg.size is always correct. To reproduce the below users will need ingress + cork and hit an error path that will then try to 'free' the skmsg. [ 173.699981] BUG: KASAN: null-ptr-deref in sk_msg_free_elem+0xdd/0x120 [ 173.699987] Read of size 8 at addr 0000000000000008 by task test_sockmap/5317 [ 173.700000] CPU: 2 PID: 5317 Comm: test_sockmap Tainted: G I 5.7.0-rc1+ #43 [ 173.700005] Hardware name: Dell Inc. Precision 5820 Tower/002KVM, BIOS 1.9.2 01/24/2019 [ 173.700009] Call Trace: [ 173.700021] dump_stack+0x8e/0xcb [ 173.700029] ? sk_msg_free_elem+0xdd/0x120 [ 173.700034] ? sk_msg_free_elem+0xdd/0x120 [ 173.700042] __kasan_report+0x102/0x15f [ 173.700052] ? sk_msg_free_elem+0xdd/0x120 [ 173.700060] kasan_report+0x32/0x50 [ 173.700070] sk_msg_free_elem+0xdd/0x120 [ 173.700080] __sk_msg_free+0x87/0x150 [ 173.700094] tcp_bpf_send_verdict+0x179/0x4f0 [ 173.700109] tcp_bpf_sendpage+0x3ce/0x5d0 Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/158861290407.14306.5327773422227552482.stgit@john-Precision-5820-Tower --- include/linux/skmsg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 8a709f63c5e5..ad31c9fb7158 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -187,6 +187,7 @@ static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, dst->sg.data[which] = src->sg.data[which]; dst->sg.data[which].length = size; dst->sg.size += size; + src->sg.size -= size; src->sg.data[which].length -= size; src->sg.data[which].offset += size; } -- cgit v1.2.3 From eb7ae5e06bb6e6ac6bb86872d27c43ebab92f6b2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 May 2020 14:47:54 +0200 Subject: bdi: move bdi_dev_name out of line bdi_dev_name is not a fast path function, move it out of line. This prepares for using it from modular callers without having to export an implementation detail like bdi_unknown_name. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Greg Kroah-Hartman Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f88197c1ffc2..c9ad5c3b7b4b 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -505,13 +505,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) (1 << WB_async_congested)); } -extern const char *bdi_unknown_name; - -static inline const char *bdi_dev_name(struct backing_dev_info *bdi) -{ - if (!bdi || !bdi->dev) - return bdi_unknown_name; - return dev_name(bdi->dev); -} +const char *bdi_dev_name(struct backing_dev_info *bdi); #endif /* _LINUX_BACKING_DEV_H */ -- cgit v1.2.3 From 6bd87eec23cbc9ed222bed0f5b5b02bf300e9a8d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 May 2020 14:47:56 +0200 Subject: bdi: add a ->dev_name field to struct backing_dev_info Cache a copy of the name for the life time of the backing_dev_info structure so that we can reference it even after unregistering. Fixes: 68f23b89067f ("memcg: fix a crash in wb_workfn when a device disappears") Reported-by: Yufen Yu Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index ee577a83cfe6..7367150f962a 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -219,6 +219,7 @@ struct backing_dev_info { wait_queue_head_t wb_waitq; struct device *dev; + char dev_name[64]; struct device *owner; struct timer_list laptop_mode_wb_timer; -- cgit v1.2.3 From 2c864c78c2386ada7433268cdfa8cb77cfe31bf3 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 11 May 2020 14:02:15 -0700 Subject: ptp: fix struct member comment for do_aux_work The do_aux_work callback had documentation in the structure comment which referred to it as "do_work". Signed-off-by: Jacob Keller Cc: Richard Cochran Acked-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 121a7eda4593..c602670bbffb 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -105,10 +105,10 @@ struct ptp_system_timestamp { * parameter func: the desired function to use. * parameter chan: the function channel index to use. * - * @do_work: Request driver to perform auxiliary (periodic) operations - * Driver should return delay of the next auxiliary work scheduling - * time (>=0) or negative value in case further scheduling - * is not required. + * @do_aux_work: Request driver to perform auxiliary (periodic) operations + * Driver should return delay of the next auxiliary work + * scheduling time (>=0) or negative value in case further + * scheduling is not required. * * Drivers should embed their ptp_clock_info within a private * structure, obtaining a reference to it using container_of(). -- cgit v1.2.3 From 59566b0b622e3e6ea928c0b8cac8a5601b00b383 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 30 Apr 2020 20:21:47 -0400 Subject: x86/ftrace: Have ftrace trampolines turn read-only at the end of system boot up Booting one of my machines, it triggered the following crash: Kernel/User page tables isolation: enabled ftrace: allocating 36577 entries in 143 pages Starting tracer 'function' BUG: unable to handle page fault for address: ffffffffa000005c #PF: supervisor write access in kernel mode #PF: error_code(0x0003) - permissions violation PGD 2014067 P4D 2014067 PUD 2015063 PMD 7b253067 PTE 7b252061 Oops: 0003 [#1] PREEMPT SMP PTI CPU: 0 PID: 0 Comm: swapper Not tainted 5.4.0-test+ #24 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007 RIP: 0010:text_poke_early+0x4a/0x58 Code: 34 24 48 89 54 24 08 e8 bf 72 0b 00 48 8b 34 24 48 8b 4c 24 08 84 c0 74 0b 48 89 df f3 a4 48 83 c4 10 5b c3 9c 58 fa 48 89 df a4 50 9d 48 83 c4 10 5b e9 d6 f9 ff ff 0 41 57 49 RSP: 0000:ffffffff82003d38 EFLAGS: 00010046 RAX: 0000000000000046 RBX: ffffffffa000005c RCX: 0000000000000005 RDX: 0000000000000005 RSI: ffffffff825b9a90 RDI: ffffffffa000005c RBP: ffffffffa000005c R08: 0000000000000000 R09: ffffffff8206e6e0 R10: ffff88807b01f4c0 R11: ffffffff8176c106 R12: ffffffff8206e6e0 R13: ffffffff824f2440 R14: 0000000000000000 R15: ffffffff8206eac0 FS: 0000000000000000(0000) GS:ffff88807d400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa000005c CR3: 0000000002012000 CR4: 00000000000006b0 Call Trace: text_poke_bp+0x27/0x64 ? mutex_lock+0x36/0x5d arch_ftrace_update_trampoline+0x287/0x2d5 ? ftrace_replace_code+0x14b/0x160 ? ftrace_update_ftrace_func+0x65/0x6c __register_ftrace_function+0x6d/0x81 ftrace_startup+0x23/0xc1 register_ftrace_function+0x20/0x37 func_set_flag+0x59/0x77 __set_tracer_option.isra.19+0x20/0x3e trace_set_options+0xd6/0x13e apply_trace_boot_options+0x44/0x6d register_tracer+0x19e/0x1ac early_trace_init+0x21b/0x2c9 start_kernel+0x241/0x518 ? load_ucode_intel_bsp+0x21/0x52 secondary_startup_64+0xa4/0xb0 I was able to trigger it on other machines, when I added to the kernel command line of both "ftrace=function" and "trace_options=func_stack_trace". The cause is the "ftrace=function" would register the function tracer and create a trampoline, and it will set it as executable and read-only. Then the "trace_options=func_stack_trace" would then update the same trampoline to include the stack tracer version of the function tracer. But since the trampoline already exists, it updates it with text_poke_bp(). The problem is that text_poke_bp() called while system_state == SYSTEM_BOOTING, it will simply do a memcpy() and not the page mapping, as it would think that the text is still read-write. But in this case it is not, and we take a fault and crash. Instead, lets keep the ftrace trampolines read-write during boot up, and then when the kernel executable text is set to read-only, the ftrace trampolines get set to read-only as well. Link: https://lkml.kernel.org/r/20200430202147.4dc6e2de@oasis.local.home Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Josh Poimboeuf Cc: "H. Peter Anvin" Cc: stable@vger.kernel.org Fixes: 768ae4406a5c ("x86/ftrace: Use text_poke()") Acked-by: Peter Zijlstra Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index db95244a62d4..ab4bd15cbcdb 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -210,6 +210,29 @@ struct ftrace_ops { #endif }; +extern struct ftrace_ops __rcu *ftrace_ops_list; +extern struct ftrace_ops ftrace_list_end; + +/* + * Traverse the ftrace_global_list, invoking all entries. The reason that we + * can use rcu_dereference_raw_check() is that elements removed from this list + * are simply leaked, so there is no need to interact with a grace-period + * mechanism. The rcu_dereference_raw_check() calls are needed to handle + * concurrent insertions into the ftrace_global_list. + * + * Silly Alpha and silly pointer-speculation compiler optimizations! + */ +#define do_for_each_ftrace_op(op, list) \ + op = rcu_dereference_raw_check(list); \ + do + +/* + * Optimized for just a single item in the list (as that is the normal case). + */ +#define while_for_each_ftrace_op(op) \ + while (likely(op = rcu_dereference_raw_check((op)->next)) && \ + unlikely((op) != &ftrace_list_end)) + /* * Type of the current tracing. */ -- cgit v1.2.3 From 04fd61a4e01028210a91f0efc408c8bc61a3018c Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 13 May 2020 17:50:34 -0700 Subject: mm, memcg: fix inconsistent oom event behavior A recent commit 9852ae3fe529 ("mm, memcg: consider subtrees in memory.events") changed the behavior of memcg events, which will now consider subtrees in memory.events. But oom_kill event is a special one as it is used in both cgroup1 and cgroup2. In cgroup1, it is displayed in memory.oom_control. The file memory.oom_control is in both root memcg and non root memcg, that is different with memory.event as it only in non-root memcg. That commit is okay for cgroup2, but it is not okay for cgroup1 as it will cause inconsistent behavior between root memcg and non-root memcg. Here's an example on why this behavior is inconsistent in cgroup1. root memcg / memcg foo / memcg bar Suppose there's an oom_kill in memcg bar, then the oon_kill will be root memcg : memory.oom_control(oom_kill) 0 / memcg foo : memory.oom_control(oom_kill) 1 / memcg bar : memory.oom_control(oom_kill) 1 For the non-root memcg, its memory.oom_control(oom_kill) includes its descendants' oom_kill, but for root memcg, it doesn't include its descendants' oom_kill. That means, memory.oom_control(oom_kill) has different meanings in different memcgs. That is inconsistent. Then the user has to know whether the memcg is root or not. If we can't fully support it in cgroup1, for example by adding memory.events.local into cgroup1 as well, then let's don't touch its original behavior. Fixes: 9852ae3fe529 ("mm, memcg: consider subtrees in memory.events") Reported-by: Randy Dunlap Signed-off-by: Yafang Shao Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Chris Down Acked-by: Michal Hocko Cc: Link: http://lkml.kernel.org/r/20200502141055.7378-1-laoar.shao@gmail.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d275c72c4f8e..977edd3b7bd8 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -783,6 +783,8 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg, atomic_long_inc(&memcg->memory_events[event]); cgroup_file_notify(&memcg->events_file); + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + break; if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS) break; } while ((memcg = parent_mem_cgroup(memcg)) && -- cgit v1.2.3 From 625236ba3832ae947cb3ebb7acc1f30788b274ef Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 12 May 2020 19:46:07 +0200 Subject: security: Fix the default value of secid_to_secctx hook security_secid_to_secctx is called by the bpf_lsm hook and a successful return value (i.e 0) implies that the parameter will be consumed by the LSM framework. The current behaviour return success when the pointer isn't initialized when CONFIG_BPF_LSM is enabled, with the default return from kernel/bpf/bpf_lsm.c. This is the internal error: [ 1229.341488][ T2659] usercopy: Kernel memory exposure attempt detected from null address (offset 0, size 280)! [ 1229.374977][ T2659] ------------[ cut here ]------------ [ 1229.376813][ T2659] kernel BUG at mm/usercopy.c:99! [ 1229.378398][ T2659] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 1229.380348][ T2659] Modules linked in: [ 1229.381654][ T2659] CPU: 0 PID: 2659 Comm: systemd-journal Tainted: G B W 5.7.0-rc5-next-20200511-00019-g864e0c6319b8-dirty #13 [ 1229.385429][ T2659] Hardware name: linux,dummy-virt (DT) [ 1229.387143][ T2659] pstate: 80400005 (Nzcv daif +PAN -UAO BTYPE=--) [ 1229.389165][ T2659] pc : usercopy_abort+0xc8/0xcc [ 1229.390705][ T2659] lr : usercopy_abort+0xc8/0xcc [ 1229.392225][ T2659] sp : ffff000064247450 [ 1229.393533][ T2659] x29: ffff000064247460 x28: 0000000000000000 [ 1229.395449][ T2659] x27: 0000000000000118 x26: 0000000000000000 [ 1229.397384][ T2659] x25: ffffa000127049e0 x24: ffffa000127049e0 [ 1229.399306][ T2659] x23: ffffa000127048e0 x22: ffffa000127048a0 [ 1229.401241][ T2659] x21: ffffa00012704b80 x20: ffffa000127049e0 [ 1229.403163][ T2659] x19: ffffa00012704820 x18: 0000000000000000 [ 1229.405094][ T2659] x17: 0000000000000000 x16: 0000000000000000 [ 1229.407008][ T2659] x15: 0000000000000000 x14: 003d090000000000 [ 1229.408942][ T2659] x13: ffff80000d5b25b2 x12: 1fffe0000d5b25b1 [ 1229.410859][ T2659] x11: 1fffe0000d5b25b1 x10: ffff80000d5b25b1 [ 1229.412791][ T2659] x9 : ffffa0001034bee0 x8 : ffff00006ad92d8f [ 1229.414707][ T2659] x7 : 0000000000000000 x6 : ffffa00015eacb20 [ 1229.416642][ T2659] x5 : ffff0000693c8040 x4 : 0000000000000000 [ 1229.418558][ T2659] x3 : ffffa0001034befc x2 : d57a7483a01c6300 [ 1229.420610][ T2659] x1 : 0000000000000000 x0 : 0000000000000059 [ 1229.422526][ T2659] Call trace: [ 1229.423631][ T2659] usercopy_abort+0xc8/0xcc [ 1229.425091][ T2659] __check_object_size+0xdc/0x7d4 [ 1229.426729][ T2659] put_cmsg+0xa30/0xa90 [ 1229.428132][ T2659] unix_dgram_recvmsg+0x80c/0x930 [ 1229.429731][ T2659] sock_recvmsg+0x9c/0xc0 [ 1229.431123][ T2659] ____sys_recvmsg+0x1cc/0x5f8 [ 1229.432663][ T2659] ___sys_recvmsg+0x100/0x160 [ 1229.434151][ T2659] __sys_recvmsg+0x110/0x1a8 [ 1229.435623][ T2659] __arm64_sys_recvmsg+0x58/0x70 [ 1229.437218][ T2659] el0_svc_common.constprop.1+0x29c/0x340 [ 1229.438994][ T2659] do_el0_svc+0xe8/0x108 [ 1229.440587][ T2659] el0_svc+0x74/0x88 [ 1229.441917][ T2659] el0_sync_handler+0xe4/0x8b4 [ 1229.443464][ T2659] el0_sync+0x17c/0x180 [ 1229.444920][ T2659] Code: aa1703e2 aa1603e1 910a8260 97ecc860 (d4210000) [ 1229.447070][ T2659] ---[ end trace 400497d91baeaf51 ]--- [ 1229.448791][ T2659] Kernel panic - not syncing: Fatal exception [ 1229.450692][ T2659] Kernel Offset: disabled [ 1229.452061][ T2659] CPU features: 0x240002,20002004 [ 1229.453647][ T2659] Memory Limit: none [ 1229.455015][ T2659] ---[ end Kernel panic - not syncing: Fatal exception ]--- Rework the so the default return value is -EOPNOTSUPP. There are likely other callbacks such as security_inode_getsecctx() that may have the same problem, and that someone that understand the code better needs to audit them. Thank you Arnd for helping me figure out what went wrong. Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Signed-off-by: Anders Roxell Signed-off-by: Alexei Starovoitov Acked-by: James Morris Cc: Arnd Bergmann Link: https://lore.kernel.org/bpf/20200512174607.9630-1-anders.roxell@linaro.org --- include/linux/lsm_hook_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 9cd4455528e5..21f4fff9e4cd 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -243,7 +243,7 @@ LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, char *name, char **value) LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) LSM_HOOK(int, 0, ismaclabel, const char *name) -LSM_HOOK(int, 0, secid_to_secctx, u32 secid, char **secdata, +LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, char **secdata, u32 *seclen) LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) -- cgit v1.2.3 From cc8a677a76f419016b5e231207d09b073f9b1d3f Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Thu, 14 May 2020 08:57:33 +0800 Subject: net: phy: broadcom: fix BCM54XX_SHD_SCR3_TRDDAPD value for BCM54810 Set the correct bit when checking for PHY_BRCM_DIS_TXCRXC_NOENRGY on the BCM54810 PHY. Fixes: 0ececcfc9267 ("net: phy: broadcom: Allow BCM54810 to use bcm54xx_adjust_rxrefclk()") Signed-off-by: Kevin Lo Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 6462c5447872..f4b77018c625 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -245,6 +245,7 @@ #define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN (1 << 0) #define BCM54810_SHD_CLK_CTL 0x3 #define BCM54810_SHD_CLK_CTL_GTXCLK_EN (1 << 9) +#define BCM54810_SHD_SCR3_TRDDAPD 0x0100 /* BCM54612E Registers */ #define BCM54612E_EXP_SPARE0 (MII_BCM54XX_EXP_SEL_ETC + 0x34) -- cgit v1.2.3