diff options
Diffstat (limited to 'include')
186 files changed, 3000 insertions, 2163 deletions
diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 451f6276da49..2fc89704be17 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -66,7 +66,7 @@ ******************************************************************************/ struct acpi_table_header { - char signature[ACPI_NAMESEG_SIZE]; /* ASCII table signature */ + char signature[ACPI_NAMESEG_SIZE] __nonstring; /* ASCII table signature */ u32 length; /* Length of table in bytes, including this header */ u8 revision; /* ACPI Specification minor version number */ u8 checksum; /* To make sum of entire table == 0 */ diff --git a/include/asm-generic/simd.h b/include/asm-generic/simd.h index d0343d58a74a..ac29a22eb7cf 100644 --- a/include/asm-generic/simd.h +++ b/include/asm-generic/simd.h @@ -1,6 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_SIMD_H +#define _ASM_GENERIC_SIMD_H -#include <linux/hardirq.h> +#include <linux/compiler_attributes.h> +#include <linux/preempt.h> +#include <linux/types.h> /* * may_use_simd - whether it is allowable at this time to issue SIMD @@ -13,3 +17,5 @@ static __must_check inline bool may_use_simd(void) { return !in_interrupt(); } + +#endif /* _ASM_GENERIC_SIMD_H */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 58a635a6d5bd..66409bc3a4e0 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -108,13 +108,13 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) #define TEXT_MAIN .text #endif #if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) -#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* +#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data.rel.* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral* #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* #else -#define DATA_MAIN .data +#define DATA_MAIN .data .data.rel .data.rel.local #define SDATA_MAIN .sdata #define RODATA_MAIN .rodata #define BSS_MAIN .bss diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index c497c73baf13..9eacb9fa375d 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -32,30 +32,28 @@ /* Set this bit for if virtual address destination cannot be used for DMA. */ #define CRYPTO_ACOMP_REQ_DST_NONDMA 0x00000010 -/* Set this bit if source is a folio. */ -#define CRYPTO_ACOMP_REQ_SRC_FOLIO 0x00000020 - -/* Set this bit if destination is a folio. */ -#define CRYPTO_ACOMP_REQ_DST_FOLIO 0x00000040 +/* Private flags that should not be touched by the user. */ +#define CRYPTO_ACOMP_REQ_PRIVATE \ + (CRYPTO_ACOMP_REQ_SRC_VIRT | CRYPTO_ACOMP_REQ_SRC_NONDMA | \ + CRYPTO_ACOMP_REQ_DST_VIRT | CRYPTO_ACOMP_REQ_DST_NONDMA) #define CRYPTO_ACOMP_DST_MAX 131072 #define MAX_SYNC_COMP_REQSIZE 0 -#define ACOMP_REQUEST_ALLOC(name, tfm, gfp) \ +#define ACOMP_REQUEST_ON_STACK(name, tfm) \ char __##name##_req[sizeof(struct acomp_req) + \ MAX_SYNC_COMP_REQSIZE] CRYPTO_MINALIGN_ATTR; \ struct acomp_req *name = acomp_request_on_stack_init( \ - __##name##_req, (tfm), (gfp), false) + __##name##_req, (tfm)) + +#define ACOMP_REQUEST_CLONE(name, gfp) \ + acomp_request_clone(name, sizeof(__##name##_req), gfp) struct acomp_req; struct folio; struct acomp_req_chain { - struct list_head head; - struct acomp_req *req0; - struct acomp_req *cur; - int (*op)(struct acomp_req *req); crypto_completion_t compl; void *data; struct scatterlist ssg; @@ -68,8 +66,6 @@ struct acomp_req_chain { u8 *dst; struct folio *dfolio; }; - size_t soff; - size_t doff; u32 flags; }; @@ -81,10 +77,6 @@ struct acomp_req_chain { * @dst: Destination scatterlist * @svirt: Source virtual address * @dvirt: Destination virtual address - * @sfolio: Source folio - * @soff: Source folio offset - * @dfolio: Destination folio - * @doff: Destination folio offset * @slen: Size of the input buffer * @dlen: Size of the output buffer and number of bytes produced * @chain: Private API code data, do not use @@ -95,15 +87,11 @@ struct acomp_req { union { struct scatterlist *src; const u8 *svirt; - struct folio *sfolio; }; union { struct scatterlist *dst; u8 *dvirt; - struct folio *dfolio; }; - size_t soff; - size_t doff; unsigned int slen; unsigned int dlen; @@ -126,18 +114,11 @@ struct crypto_acomp { int (*compress)(struct acomp_req *req); int (*decompress)(struct acomp_req *req); unsigned int reqsize; - struct crypto_acomp *fb; struct crypto_tfm base; }; -struct crypto_acomp_stream { - spinlock_t lock; - void *ctx; -}; - #define COMP_ALG_COMMON { \ struct crypto_alg base; \ - struct crypto_acomp_stream __percpu *stream; \ } struct comp_alg_common COMP_ALG_COMMON; @@ -213,7 +194,7 @@ static inline unsigned int crypto_acomp_reqsize(struct crypto_acomp *tfm) static inline void acomp_request_set_tfm(struct acomp_req *req, struct crypto_acomp *tfm) { - req->base.tfm = crypto_acomp_tfm(tfm); + crypto_request_set_tfm(&req->base, crypto_acomp_tfm(tfm)); } static inline bool acomp_is_async(struct crypto_acomp *tfm) @@ -310,6 +291,11 @@ static inline void *acomp_request_extra(struct acomp_req *req) return (void *)((char *)req + len); } +static inline bool acomp_req_on_stack(struct acomp_req *req) +{ + return crypto_req_on_stack(&req->base); +} + /** * acomp_request_free() -- zeroize and free asynchronous (de)compression * request as well as the output buffer if allocated @@ -319,7 +305,7 @@ static inline void *acomp_request_extra(struct acomp_req *req) */ static inline void acomp_request_free(struct acomp_req *req) { - if (!req || (req->base.flags & CRYPTO_TFM_REQ_ON_STACK)) + if (!req || acomp_req_on_stack(req)) return; kfree_sensitive(req); } @@ -340,17 +326,9 @@ static inline void acomp_request_set_callback(struct acomp_req *req, crypto_completion_t cmpl, void *data) { - u32 keep = CRYPTO_ACOMP_REQ_SRC_VIRT | CRYPTO_ACOMP_REQ_SRC_NONDMA | - CRYPTO_ACOMP_REQ_DST_VIRT | CRYPTO_ACOMP_REQ_DST_NONDMA | - CRYPTO_ACOMP_REQ_SRC_FOLIO | CRYPTO_ACOMP_REQ_DST_FOLIO | - CRYPTO_TFM_REQ_ON_STACK; - - req->base.complete = cmpl; - req->base.data = data; - req->base.flags &= keep; - req->base.flags |= flgs & ~keep; - - crypto_reqchain_init(&req->base); + flgs &= ~CRYPTO_ACOMP_REQ_PRIVATE; + flgs |= req->base.flags & CRYPTO_ACOMP_REQ_PRIVATE; + crypto_request_set_callback(&req->base, flgs, cmpl, data); } /** @@ -379,8 +357,6 @@ static inline void acomp_request_set_params(struct acomp_req *req, req->base.flags &= ~(CRYPTO_ACOMP_REQ_SRC_VIRT | CRYPTO_ACOMP_REQ_SRC_NONDMA | - CRYPTO_ACOMP_REQ_SRC_FOLIO | - CRYPTO_ACOMP_REQ_DST_FOLIO | CRYPTO_ACOMP_REQ_DST_VIRT | CRYPTO_ACOMP_REQ_DST_NONDMA); } @@ -403,7 +379,6 @@ static inline void acomp_request_set_src_sg(struct acomp_req *req, req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_NONDMA; req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_VIRT; - req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_FOLIO; } /** @@ -423,7 +398,6 @@ static inline void acomp_request_set_src_dma(struct acomp_req *req, req->slen = slen; req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_NONDMA; - req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_FOLIO; req->base.flags |= CRYPTO_ACOMP_REQ_SRC_VIRT; } @@ -444,7 +418,6 @@ static inline void acomp_request_set_src_nondma(struct acomp_req *req, req->svirt = src; req->slen = slen; - req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_FOLIO; req->base.flags |= CRYPTO_ACOMP_REQ_SRC_NONDMA; req->base.flags |= CRYPTO_ACOMP_REQ_SRC_VIRT; } @@ -463,13 +436,9 @@ static inline void acomp_request_set_src_folio(struct acomp_req *req, struct folio *folio, size_t off, unsigned int len) { - req->sfolio = folio; - req->soff = off; - req->slen = len; - - req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_NONDMA; - req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_VIRT; - req->base.flags |= CRYPTO_ACOMP_REQ_SRC_FOLIO; + sg_init_table(&req->chain.ssg, 1); + sg_set_folio(&req->chain.ssg, folio, len, off); + acomp_request_set_src_sg(req, &req->chain.ssg, len); } /** @@ -490,7 +459,6 @@ static inline void acomp_request_set_dst_sg(struct acomp_req *req, req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_NONDMA; req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_VIRT; - req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_FOLIO; } /** @@ -510,7 +478,6 @@ static inline void acomp_request_set_dst_dma(struct acomp_req *req, req->dlen = dlen; req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_NONDMA; - req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_FOLIO; req->base.flags |= CRYPTO_ACOMP_REQ_DST_VIRT; } @@ -530,7 +497,6 @@ static inline void acomp_request_set_dst_nondma(struct acomp_req *req, req->dvirt = dst; req->dlen = dlen; - req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_FOLIO; req->base.flags |= CRYPTO_ACOMP_REQ_DST_NONDMA; req->base.flags |= CRYPTO_ACOMP_REQ_DST_VIRT; } @@ -549,19 +515,9 @@ static inline void acomp_request_set_dst_folio(struct acomp_req *req, struct folio *folio, size_t off, unsigned int len) { - req->dfolio = folio; - req->doff = off; - req->dlen = len; - - req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_NONDMA; - req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_VIRT; - req->base.flags |= CRYPTO_ACOMP_REQ_DST_FOLIO; -} - -static inline void acomp_request_chain(struct acomp_req *req, - struct acomp_req *head) -{ - crypto_request_chain(&req->base, &head->base); + sg_init_table(&req->chain.dsg, 1); + sg_set_folio(&req->chain.dsg, folio, len, off); + acomp_request_set_dst_sg(req, &req->chain.dsg, len); } /** @@ -587,18 +543,15 @@ int crypto_acomp_compress(struct acomp_req *req); int crypto_acomp_decompress(struct acomp_req *req); static inline struct acomp_req *acomp_request_on_stack_init( - char *buf, struct crypto_acomp *tfm, gfp_t gfp, bool stackonly) + char *buf, struct crypto_acomp *tfm) { - struct acomp_req *req; - - if (!stackonly && (req = acomp_request_alloc(tfm, gfp))) - return req; - - req = (void *)buf; - acomp_request_set_tfm(req, tfm->fb); - req->base.flags = CRYPTO_TFM_REQ_ON_STACK; + struct acomp_req *req = (void *)buf; + crypto_stack_request_init(&req->base, crypto_acomp_tfm(tfm)); return req; } +struct acomp_req *acomp_request_clone(struct acomp_req *req, + size_t total, gfp_t gfp); + #endif diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 6e07bbc04089..188eface0a11 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -68,16 +68,17 @@ struct crypto_instance { struct crypto_spawn *spawns; }; - struct work_struct free_work; - void *__ctx[] CRYPTO_MINALIGN_ATTR; }; struct crypto_template { struct list_head list; struct hlist_head instances; + struct hlist_head dead; struct module *module; + struct work_struct free_work; + int (*create)(struct crypto_template *tmpl, struct rtattr **tb); char name[CRYPTO_MAX_ALG_NAME]; @@ -106,18 +107,6 @@ struct crypto_queue { unsigned int max_qlen; }; -struct scatter_walk { - /* Must be the first member, see struct skcipher_walk. */ - union { - void *const addr; - - /* Private API field, do not touch. */ - union crypto_no_such_thing *__addr; - }; - struct scatterlist *sg; - unsigned int offset; -}; - struct crypto_attr_alg { char name[CRYPTO_MAX_ALG_NAME]; }; @@ -157,8 +146,16 @@ void *crypto_spawn_tfm2(struct crypto_spawn *spawn); struct crypto_attr_type *crypto_get_attr_type(struct rtattr **tb); int crypto_check_attr_type(struct rtattr **tb, u32 type, u32 *mask_ret); const char *crypto_attr_alg_name(struct rtattr *rta); -int crypto_inst_setname(struct crypto_instance *inst, const char *name, - struct crypto_alg *alg); +int __crypto_inst_setname(struct crypto_instance *inst, const char *name, + const char *driver, struct crypto_alg *alg); + +#define crypto_inst_setname(inst, name, ...) \ + CONCATENATE(crypto_inst_setname_, COUNT_ARGS(__VA_ARGS__))( \ + inst, name, ##__VA_ARGS__) +#define crypto_inst_setname_1(inst, name, alg) \ + __crypto_inst_setname(inst, name, name, alg) +#define crypto_inst_setname_2(inst, name, driver, alg) \ + __crypto_inst_setname(inst, name, driver, alg) void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen); int crypto_enqueue_request(struct crypto_queue *queue, @@ -266,14 +263,14 @@ static inline u32 crypto_tfm_alg_type(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK; } -static inline bool crypto_request_chained(struct crypto_async_request *req) +static inline bool crypto_tfm_req_virt(struct crypto_tfm *tfm) { - return !list_empty(&req->list); + return tfm->__crt_alg->cra_flags & CRYPTO_ALG_REQ_VIRT; } -static inline bool crypto_tfm_req_chain(struct crypto_tfm *tfm) +static inline u32 crypto_request_flags(struct crypto_async_request *req) { - return tfm->__crt_alg->cra_flags & CRYPTO_ALG_REQ_CHAIN; + return req->flags & ~CRYPTO_TFM_REQ_ON_STACK; } #endif /* _CRYPTO_ALGAPI_H */ diff --git a/include/crypto/blake2b.h b/include/crypto/blake2b.h index 0c0176285349..dd7694477e50 100644 --- a/include/crypto/blake2b.h +++ b/include/crypto/blake2b.h @@ -7,10 +7,20 @@ #include <linux/types.h> #include <linux/string.h> +struct blake2b_state { + /* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */ + u64 h[8]; + u64 t[2]; + /* The true state ends here. The rest is temporary storage. */ + u64 f[2]; +}; + enum blake2b_lengths { BLAKE2B_BLOCK_SIZE = 128, BLAKE2B_HASH_SIZE = 64, BLAKE2B_KEY_SIZE = 64, + BLAKE2B_STATE_SIZE = offsetof(struct blake2b_state, f), + BLAKE2B_DESC_SIZE = sizeof(struct blake2b_state), BLAKE2B_160_HASH_SIZE = 20, BLAKE2B_256_HASH_SIZE = 32, @@ -18,16 +28,6 @@ enum blake2b_lengths { BLAKE2B_512_HASH_SIZE = 64, }; -struct blake2b_state { - /* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */ - u64 h[8]; - u64 t[2]; - u64 f[2]; - u8 buf[BLAKE2B_BLOCK_SIZE]; - unsigned int buflen; - unsigned int outlen; -}; - enum blake2b_iv { BLAKE2B_IV0 = 0x6A09E667F3BCC908ULL, BLAKE2B_IV1 = 0xBB67AE8584CAA73BULL, @@ -40,7 +40,7 @@ enum blake2b_iv { }; static inline void __blake2b_init(struct blake2b_state *state, size_t outlen, - const void *key, size_t keylen) + size_t keylen) { state->h[0] = BLAKE2B_IV0 ^ (0x01010000 | keylen << 8 | outlen); state->h[1] = BLAKE2B_IV1; @@ -52,15 +52,6 @@ static inline void __blake2b_init(struct blake2b_state *state, size_t outlen, state->h[7] = BLAKE2B_IV7; state->t[0] = 0; state->t[1] = 0; - state->f[0] = 0; - state->f[1] = 0; - state->buflen = 0; - state->outlen = outlen; - if (keylen) { - memcpy(state->buf, key, keylen); - memset(&state->buf[keylen], 0, BLAKE2B_BLOCK_SIZE - keylen); - state->buflen = BLAKE2B_BLOCK_SIZE; - } } #endif /* _CRYPTO_BLAKE2B_H */ diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index f8cc073bba41..91f6b4cf561c 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -16,6 +16,7 @@ #define _CRYPTO_CHACHA_H #include <linux/unaligned.h> +#include <linux/string.h> #include <linux/types.h> /* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */ @@ -25,21 +26,32 @@ #define CHACHA_BLOCK_SIZE 64 #define CHACHAPOLY_IV_SIZE 12 -#define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32)) +#define CHACHA_KEY_WORDS 8 +#define CHACHA_STATE_WORDS 16 +#define HCHACHA_OUT_WORDS 8 /* 192-bit nonce, then 64-bit stream position */ #define XCHACHA_IV_SIZE 32 -void chacha_block_generic(u32 *state, u8 *stream, int nrounds); -static inline void chacha20_block(u32 *state, u8 *stream) +struct chacha_state { + u32 x[CHACHA_STATE_WORDS]; +}; + +void chacha_block_generic(struct chacha_state *state, + u8 out[CHACHA_BLOCK_SIZE], int nrounds); +static inline void chacha20_block(struct chacha_state *state, + u8 out[CHACHA_BLOCK_SIZE]) { - chacha_block_generic(state, stream, 20); + chacha_block_generic(state, out, 20); } -void hchacha_block_arch(const u32 *state, u32 *out, int nrounds); -void hchacha_block_generic(const u32 *state, u32 *out, int nrounds); +void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); +void hchacha_block_generic(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); -static inline void hchacha_block(const u32 *state, u32 *out, int nrounds) +static inline void hchacha_block(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) { if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA)) hchacha_block_arch(state, out, nrounds); @@ -54,37 +66,40 @@ enum chacha_constants { /* expand 32-byte k */ CHACHA_CONSTANT_TE_K = 0x6b206574U }; -static inline void chacha_init_consts(u32 *state) +static inline void chacha_init_consts(struct chacha_state *state) { - state[0] = CHACHA_CONSTANT_EXPA; - state[1] = CHACHA_CONSTANT_ND_3; - state[2] = CHACHA_CONSTANT_2_BY; - state[3] = CHACHA_CONSTANT_TE_K; + state->x[0] = CHACHA_CONSTANT_EXPA; + state->x[1] = CHACHA_CONSTANT_ND_3; + state->x[2] = CHACHA_CONSTANT_2_BY; + state->x[3] = CHACHA_CONSTANT_TE_K; } -static inline void chacha_init(u32 *state, const u32 *key, const u8 *iv) +static inline void chacha_init(struct chacha_state *state, + const u32 key[CHACHA_KEY_WORDS], + const u8 iv[CHACHA_IV_SIZE]) { chacha_init_consts(state); - state[4] = key[0]; - state[5] = key[1]; - state[6] = key[2]; - state[7] = key[3]; - state[8] = key[4]; - state[9] = key[5]; - state[10] = key[6]; - state[11] = key[7]; - state[12] = get_unaligned_le32(iv + 0); - state[13] = get_unaligned_le32(iv + 4); - state[14] = get_unaligned_le32(iv + 8); - state[15] = get_unaligned_le32(iv + 12); + state->x[4] = key[0]; + state->x[5] = key[1]; + state->x[6] = key[2]; + state->x[7] = key[3]; + state->x[8] = key[4]; + state->x[9] = key[5]; + state->x[10] = key[6]; + state->x[11] = key[7]; + state->x[12] = get_unaligned_le32(iv + 0); + state->x[13] = get_unaligned_le32(iv + 4); + state->x[14] = get_unaligned_le32(iv + 8); + state->x[15] = get_unaligned_le32(iv + 12); } -void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, +void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, unsigned int bytes, int nrounds); -void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src, +void chacha_crypt_generic(struct chacha_state *state, u8 *dst, const u8 *src, unsigned int bytes, int nrounds); -static inline void chacha_crypt(u32 *state, u8 *dst, const u8 *src, +static inline void chacha_crypt(struct chacha_state *state, + u8 *dst, const u8 *src, unsigned int bytes, int nrounds) { if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA)) @@ -93,10 +108,24 @@ static inline void chacha_crypt(u32 *state, u8 *dst, const u8 *src, chacha_crypt_generic(state, dst, src, bytes, nrounds); } -static inline void chacha20_crypt(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes) +static inline void chacha20_crypt(struct chacha_state *state, + u8 *dst, const u8 *src, unsigned int bytes) { chacha_crypt(state, dst, src, bytes, 20); } +static inline void chacha_zeroize_state(struct chacha_state *state) +{ + memzero_explicit(state, sizeof(*state)); +} + +#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA) +bool chacha_is_arch_optimized(void); +#else +static inline bool chacha_is_arch_optimized(void) +{ + return false; +} +#endif + #endif /* _CRYPTO_CHACHA_H */ diff --git a/include/crypto/ctr.h b/include/crypto/ctr.h index da1ee73e9ce9..06984a26c8cf 100644 --- a/include/crypto/ctr.h +++ b/include/crypto/ctr.h @@ -8,58 +8,8 @@ #ifndef _CRYPTO_CTR_H #define _CRYPTO_CTR_H -#include <crypto/algapi.h> -#include <crypto/internal/skcipher.h> -#include <linux/string.h> -#include <linux/types.h> - #define CTR_RFC3686_NONCE_SIZE 4 #define CTR_RFC3686_IV_SIZE 8 #define CTR_RFC3686_BLOCK_SIZE 16 -static inline int crypto_ctr_encrypt_walk(struct skcipher_request *req, - void (*fn)(struct crypto_skcipher *, - const u8 *, u8 *)) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - int blocksize = crypto_skcipher_chunksize(tfm); - u8 buf[MAX_CIPHER_BLOCKSIZE]; - struct skcipher_walk walk; - int err; - - /* avoid integer division due to variable blocksize parameter */ - if (WARN_ON_ONCE(!is_power_of_2(blocksize))) - return -EINVAL; - - err = skcipher_walk_virt(&walk, req, false); - - while (walk.nbytes > 0) { - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - int nbytes = walk.nbytes; - int tail = 0; - - if (nbytes < walk.total) { - tail = walk.nbytes & (blocksize - 1); - nbytes -= tail; - } - - do { - int bsize = min(nbytes, blocksize); - - fn(tfm, walk.iv, buf); - - crypto_xor_cpy(dst, src, buf, bsize); - crypto_inc(walk.iv, blocksize); - - dst += bsize; - src += bsize; - nbytes -= bsize; - } while (nbytes > 0); - - err = skcipher_walk_done(&walk, tail); - } - return err; -} - #endif /* _CRYPTO_CTR_H */ diff --git a/include/crypto/ghash.h b/include/crypto/ghash.h index f832c9f2aca3..043d938e9a2c 100644 --- a/include/crypto/ghash.h +++ b/include/crypto/ghash.h @@ -7,18 +7,18 @@ #define __CRYPTO_GHASH_H__ #include <linux/types.h> -#include <crypto/gf128mul.h> #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 +struct gf128mul_4k; + struct ghash_ctx { struct gf128mul_4k *gf128; }; struct ghash_desc_ctx { u8 buffer[GHASH_BLOCK_SIZE]; - u32 bytes; }; #endif diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 2aa83ee0ec98..6f6b9de12cd3 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -8,13 +8,17 @@ #ifndef _CRYPTO_HASH_H #define _CRYPTO_HASH_H -#include <linux/atomic.h> #include <linux/crypto.h> +#include <linux/scatterlist.h> +#include <linux/slab.h> #include <linux/string.h> /* Set this bit for virtual address instead of SG list. */ #define CRYPTO_AHASH_REQ_VIRT 0x00000001 +#define CRYPTO_AHASH_REQ_PRIVATE \ + CRYPTO_AHASH_REQ_VIRT + struct crypto_ahash; /** @@ -61,6 +65,10 @@ struct ahash_request { }; u8 *result; + struct scatterlist sg_head[2]; + crypto_completion_t saved_complete; + void *saved_data; + void *__ctx[] CRYPTO_MINALIGN_ATTR; }; @@ -81,6 +89,8 @@ struct ahash_request { * transformation object. Data processing can happen synchronously * [SHASH] or asynchronously [AHASH] at this point. Driver must not use * req->result. + * For block-only algorithms, @update must return the number + * of bytes to store in the API partial block buffer. * @final: **[mandatory]** Retrieve result from the driver. This function finalizes the * transformation and retrieves the resulting hash from the driver and * pushes it back to upper layers. No data processing happens at this @@ -123,6 +133,10 @@ struct ahash_request { * data so the transformation can continue from this point onward. No * data processing happens at this point. Driver must not use * req->result. + * @export_core: Export partial state without partial block. Only defined + * for algorithms that are not block-only. + * @import_core: Import partial state without partial block. Only defined + * for algorithms that are not block-only. * @init_tfm: Initialize the cryptographic transformation object. * This function is called only once at the instantiation * time, right after the transformation context was @@ -135,7 +149,6 @@ struct ahash_request { * This is a counterpart to @init_tfm, used to remove * various changes set in @init_tfm. * @clone_tfm: Copy transform into new object, may allocate memory. - * @reqsize: Size of the request context. * @halg: see struct hash_alg_common */ struct ahash_alg { @@ -146,14 +159,14 @@ struct ahash_alg { int (*digest)(struct ahash_request *req); int (*export)(struct ahash_request *req, void *out); int (*import)(struct ahash_request *req, const void *in); + int (*export_core)(struct ahash_request *req, void *out); + int (*import_core)(struct ahash_request *req, const void *in); int (*setkey)(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen); int (*init_tfm)(struct crypto_ahash *tfm); void (*exit_tfm)(struct crypto_ahash *tfm); int (*clone_tfm)(struct crypto_ahash *dst, struct crypto_ahash *src); - unsigned int reqsize; - struct hash_alg_common halg; }; @@ -164,17 +177,31 @@ struct shash_desc { #define HASH_MAX_DIGESTSIZE 64 +/* Worst case is sha3-224. */ +#define HASH_MAX_STATESIZE 200 + 144 + 1 + /* - * Worst case is hmac(sha3-224-generic). Its context is a nested 'shash_desc' - * containing a 'struct sha3_state'. + * Worst case is hmac(sha3-224-s390). Its context is a nested 'shash_desc' + * containing a 'struct s390_sha_ctx'. */ #define HASH_MAX_DESCSIZE (sizeof(struct shash_desc) + 360) +#define MAX_SYNC_HASH_REQSIZE (sizeof(struct ahash_request) + \ + HASH_MAX_DESCSIZE) #define SHASH_DESC_ON_STACK(shash, ctx) \ char __##shash##_desc[sizeof(struct shash_desc) + HASH_MAX_DESCSIZE] \ __aligned(__alignof__(struct shash_desc)); \ struct shash_desc *shash = (struct shash_desc *)__##shash##_desc +#define HASH_REQUEST_ON_STACK(name, _tfm) \ + char __##name##_req[sizeof(struct ahash_request) + \ + MAX_SYNC_HASH_REQSIZE] CRYPTO_MINALIGN_ATTR; \ + struct ahash_request *name = \ + ahash_request_on_stack_init(__##name##_req, (_tfm)) + +#define HASH_REQUEST_CLONE(name, gfp) \ + hash_request_clone(name, sizeof(__##name##_req), gfp) + /** * struct shash_alg - synchronous message digest definition * @init: see struct ahash_alg @@ -184,6 +211,8 @@ struct shash_desc { * @digest: see struct ahash_alg * @export: see struct ahash_alg * @import: see struct ahash_alg + * @export_core: see struct ahash_alg + * @import_core: see struct ahash_alg * @setkey: see struct ahash_alg * @init_tfm: Initialize the cryptographic transformation object. * This function is called only once at the instantiation @@ -214,6 +243,8 @@ struct shash_alg { unsigned int len, u8 *out); int (*export)(struct shash_desc *desc, void *out); int (*import)(struct shash_desc *desc, const void *in); + int (*export_core)(struct shash_desc *desc, void *out); + int (*import_core)(struct shash_desc *desc, const void *in); int (*setkey)(struct crypto_shash *tfm, const u8 *key, unsigned int keylen); int (*init_tfm)(struct crypto_shash *tfm); @@ -237,7 +268,6 @@ struct crypto_ahash { }; struct crypto_shash { - unsigned int descsize; struct crypto_tfm base; }; @@ -251,6 +281,11 @@ struct crypto_shash { * CRYPTO_ALG_TYPE_SKCIPHER API applies here as well. */ +static inline bool ahash_req_on_stack(struct ahash_request *req) +{ + return crypto_req_on_stack(&req->base); +} + static inline struct crypto_ahash *__crypto_ahash_cast(struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_ahash, base); @@ -458,7 +493,11 @@ int crypto_ahash_finup(struct ahash_request *req); * -EBUSY if queue is full and request should be resubmitted later; * other < 0 if an error occurred */ -int crypto_ahash_final(struct ahash_request *req); +static inline int crypto_ahash_final(struct ahash_request *req) +{ + req->nbytes = 0; + return crypto_ahash_finup(req); +} /** * crypto_ahash_digest() - calculate message digest for a buffer @@ -547,7 +586,7 @@ int crypto_ahash_update(struct ahash_request *req); static inline void ahash_request_set_tfm(struct ahash_request *req, struct crypto_ahash *tfm) { - req->base.tfm = crypto_ahash_tfm(tfm); + crypto_request_set_tfm(&req->base, crypto_ahash_tfm(tfm)); } /** @@ -583,6 +622,12 @@ static inline struct ahash_request *ahash_request_alloc_noprof( */ void ahash_request_free(struct ahash_request *req); +static inline void ahash_request_zero(struct ahash_request *req) +{ + memzero_explicit(req, sizeof(*req) + + crypto_ahash_reqsize(crypto_ahash_reqtfm(req))); +} + static inline struct ahash_request *ahash_request_cast( struct crypto_async_request *req) { @@ -619,14 +664,9 @@ static inline void ahash_request_set_callback(struct ahash_request *req, crypto_completion_t compl, void *data) { - u32 keep = CRYPTO_AHASH_REQ_VIRT; - - req->base.complete = compl; - req->base.data = data; - flags &= ~keep; - req->base.flags &= keep; - req->base.flags |= flags; - crypto_reqchain_init(&req->base); + flags &= ~CRYPTO_AHASH_REQ_PRIVATE; + flags |= req->base.flags & CRYPTO_AHASH_REQ_PRIVATE; + crypto_request_set_callback(&req->base, flags, compl, data); } /** @@ -675,12 +715,6 @@ static inline void ahash_request_set_virt(struct ahash_request *req, req->base.flags |= CRYPTO_AHASH_REQ_VIRT; } -static inline void ahash_request_chain(struct ahash_request *req, - struct ahash_request *head) -{ - crypto_request_chain(&req->base, &head->base); -} - /** * DOC: Synchronous Message Digest API * @@ -816,7 +850,7 @@ static inline void crypto_shash_clear_flags(struct crypto_shash *tfm, u32 flags) */ static inline unsigned int crypto_shash_descsize(struct crypto_shash *tfm) { - return tfm->descsize; + return crypto_shash_alg(tfm)->descsize; } static inline void *shash_desc_ctx(struct shash_desc *desc) @@ -834,7 +868,7 @@ static inline void *shash_desc_ctx(struct shash_desc *desc) * cipher handle must point to a keyed message digest cipher in order for this * function to succeed. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key, @@ -851,7 +885,7 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key, * crypto_shash_update and crypto_shash_final. The parameters have the same * meaning as discussed for those separate three functions. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the message digest creation was successful; < 0 if an error * occurred */ @@ -871,12 +905,15 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data, * directly, and it allocates a hash descriptor on the stack internally. * Note that this stack allocation may be fairly large. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 on success; < 0 if an error occurred. */ int crypto_shash_tfm_digest(struct crypto_shash *tfm, const u8 *data, unsigned int len, u8 *out); +int crypto_hash_digest(struct crypto_ahash *tfm, const u8 *data, + unsigned int len, u8 *out); + /** * crypto_shash_export() - extract operational state for message digest * @desc: reference to the operational state handle whose state is exported @@ -886,7 +923,7 @@ int crypto_shash_tfm_digest(struct crypto_shash *tfm, const u8 *data, * caller-allocated output buffer out which must have sufficient size (e.g. by * calling crypto_shash_descsize). * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the export creation was successful; < 0 if an error occurred */ int crypto_shash_export(struct shash_desc *desc, void *out); @@ -900,7 +937,7 @@ int crypto_shash_export(struct shash_desc *desc, void *out); * the input buffer. That buffer should have been generated with the * crypto_ahash_export function. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the import was successful; < 0 if an error occurred */ int crypto_shash_import(struct shash_desc *desc, const void *in); @@ -913,19 +950,29 @@ int crypto_shash_import(struct shash_desc *desc, const void *in); * operational state handle. Any potentially existing state created by * previous operations is discarded. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the message digest initialization was successful; < 0 if an * error occurred */ -static inline int crypto_shash_init(struct shash_desc *desc) -{ - struct crypto_shash *tfm = desc->tfm; +int crypto_shash_init(struct shash_desc *desc); - if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) - return -ENOKEY; - - return crypto_shash_alg(tfm)->init(desc); -} +/** + * crypto_shash_finup() - calculate message digest of buffer + * @desc: see crypto_shash_final() + * @data: see crypto_shash_update() + * @len: see crypto_shash_update() + * @out: see crypto_shash_final() + * + * This function is a "short-hand" for the function calls of + * crypto_shash_update and crypto_shash_final. The parameters have the same + * meaning as discussed for those separate functions. + * + * Context: Softirq or process context. + * Return: 0 if the message digest creation was successful; < 0 if an error + * occurred + */ +int crypto_shash_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out); /** * crypto_shash_update() - add data to message digest for processing @@ -935,12 +982,15 @@ static inline int crypto_shash_init(struct shash_desc *desc) * * Updates the message digest state of the operational state handle. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the message digest update was successful; < 0 if an error * occurred */ -int crypto_shash_update(struct shash_desc *desc, const u8 *data, - unsigned int len); +static inline int crypto_shash_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return crypto_shash_finup(desc, data, len, NULL); +} /** * crypto_shash_final() - calculate message digest @@ -952,29 +1002,14 @@ int crypto_shash_update(struct shash_desc *desc, const u8 *data, * into the output buffer. The caller must ensure that the output buffer is * large enough by using crypto_shash_digestsize. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the message digest creation was successful; < 0 if an error * occurred */ -int crypto_shash_final(struct shash_desc *desc, u8 *out); - -/** - * crypto_shash_finup() - calculate message digest of buffer - * @desc: see crypto_shash_final() - * @data: see crypto_shash_update() - * @len: see crypto_shash_update() - * @out: see crypto_shash_final() - * - * This function is a "short-hand" for the function calls of - * crypto_shash_update and crypto_shash_final. The parameters have the same - * meaning as discussed for those separate functions. - * - * Context: Any context. - * Return: 0 if the message digest creation was successful; < 0 if an error - * occurred - */ -int crypto_shash_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out); +static inline int crypto_shash_final(struct shash_desc *desc, u8 *out) +{ + return crypto_shash_finup(desc, NULL, 0, out); +} static inline void shash_desc_zero(struct shash_desc *desc) { @@ -982,14 +1017,25 @@ static inline void shash_desc_zero(struct shash_desc *desc) sizeof(*desc) + crypto_shash_descsize(desc->tfm)); } -static inline int ahash_request_err(struct ahash_request *req) +static inline bool ahash_is_async(struct crypto_ahash *tfm) { - return req->base.err; + return crypto_tfm_is_async(&tfm->base); } -static inline bool ahash_is_async(struct crypto_ahash *tfm) +static inline struct ahash_request *ahash_request_on_stack_init( + char *buf, struct crypto_ahash *tfm) { - return crypto_tfm_is_async(&tfm->base); + struct ahash_request *req = (void *)buf; + + crypto_stack_request_init(&req->base, crypto_ahash_tfm(tfm)); + return req; +} + +static inline struct ahash_request *ahash_request_clone( + struct ahash_request *req, size_t total, gfp_t gfp) +{ + return container_of(crypto_request_clone(&req->base, total, gfp), + struct ahash_request, base); } #endif /* _CRYPTO_HASH_H */ diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index aaf59f3236fa..ffffd88bbbad 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -11,12 +11,17 @@ #include <crypto/acompress.h> #include <crypto/algapi.h> +#include <crypto/scatterwalk.h> +#include <linux/compiler_types.h> +#include <linux/cpumask_types.h> +#include <linux/spinlock.h> +#include <linux/workqueue_types.h> -#define ACOMP_REQUEST_ON_STACK(name, tfm) \ +#define ACOMP_FBREQ_ON_STACK(name, req) \ char __##name##_req[sizeof(struct acomp_req) + \ MAX_SYNC_COMP_REQSIZE] CRYPTO_MINALIGN_ATTR; \ - struct acomp_req *name = acomp_request_on_stack_init( \ - __##name##_req, (tfm), 0, true) + struct acomp_req *name = acomp_fbreq_on_stack_init( \ + __##name##_req, (req)) /** * struct acomp_alg - asynchronous compression algorithm @@ -35,9 +40,7 @@ * counterpart to @init, used to remove various changes set in * @init. * - * @reqsize: Context size for (de)compression requests * @base: Common crypto API algorithm data structure - * @stream: Per-cpu memory for algorithm * @calg: Cmonn algorithm data structure shared with scomp */ struct acomp_alg { @@ -46,14 +49,61 @@ struct acomp_alg { int (*init)(struct crypto_acomp *tfm); void (*exit)(struct crypto_acomp *tfm); - unsigned int reqsize; - union { struct COMP_ALG_COMMON; struct comp_alg_common calg; }; }; +struct crypto_acomp_stream { + spinlock_t lock; + void *ctx; +}; + +struct crypto_acomp_streams { + /* These must come first because of struct scomp_alg. */ + void *(*alloc_ctx)(void); + union { + void (*free_ctx)(void *); + void (*cfree_ctx)(const void *); + }; + + struct crypto_acomp_stream __percpu *streams; + struct work_struct stream_work; + cpumask_t stream_want; +}; + +struct acomp_walk { + union { + /* Virtual address of the source. */ + struct { + struct { + const void *const addr; + } virt; + } src; + + /* Private field for the API, do not use. */ + struct scatter_walk in; + }; + + union { + /* Virtual address of the destination. */ + struct { + struct { + void *const addr; + } virt; + } dst; + + /* Private field for the API, do not use. */ + struct scatter_walk out; + }; + + unsigned int slen; + unsigned int dlen; + + int flags; +}; + /* * Transform internal helpers. */ @@ -98,17 +148,10 @@ void crypto_unregister_acomp(struct acomp_alg *alg); int crypto_register_acomps(struct acomp_alg *algs, int count); void crypto_unregister_acomps(struct acomp_alg *algs, int count); -static inline bool acomp_request_chained(struct acomp_req *req) -{ - return crypto_request_chained(&req->base); -} - static inline bool acomp_request_issg(struct acomp_req *req) { return !(req->base.flags & (CRYPTO_ACOMP_REQ_SRC_VIRT | - CRYPTO_ACOMP_REQ_DST_VIRT | - CRYPTO_ACOMP_REQ_SRC_FOLIO | - CRYPTO_ACOMP_REQ_DST_FOLIO)); + CRYPTO_ACOMP_REQ_DST_VIRT)); } static inline bool acomp_request_src_isvirt(struct acomp_req *req) @@ -143,19 +186,62 @@ static inline bool acomp_request_isnondma(struct acomp_req *req) CRYPTO_ACOMP_REQ_DST_NONDMA); } -static inline bool acomp_request_src_isfolio(struct acomp_req *req) +static inline bool crypto_acomp_req_virt(struct crypto_acomp *tfm) +{ + return crypto_tfm_req_virt(&tfm->base); +} + +void crypto_acomp_free_streams(struct crypto_acomp_streams *s); +int crypto_acomp_alloc_streams(struct crypto_acomp_streams *s); + +struct crypto_acomp_stream *crypto_acomp_lock_stream_bh( + struct crypto_acomp_streams *s) __acquires(stream); + +static inline void crypto_acomp_unlock_stream_bh( + struct crypto_acomp_stream *stream) __releases(stream) +{ + spin_unlock_bh(&stream->lock); +} + +void acomp_walk_done_src(struct acomp_walk *walk, int used); +void acomp_walk_done_dst(struct acomp_walk *walk, int used); +int acomp_walk_next_src(struct acomp_walk *walk); +int acomp_walk_next_dst(struct acomp_walk *walk); +int acomp_walk_virt(struct acomp_walk *__restrict walk, + struct acomp_req *__restrict req, bool atomic); + +static inline bool acomp_walk_more_src(const struct acomp_walk *walk, int cur) +{ + return walk->slen != cur; +} + +static inline u32 acomp_request_flags(struct acomp_req *req) { - return req->base.flags & CRYPTO_ACOMP_REQ_SRC_FOLIO; + return crypto_request_flags(&req->base) & ~CRYPTO_ACOMP_REQ_PRIVATE; } -static inline bool acomp_request_dst_isfolio(struct acomp_req *req) +static inline struct crypto_acomp *crypto_acomp_fb(struct crypto_acomp *tfm) { - return req->base.flags & CRYPTO_ACOMP_REQ_DST_FOLIO; + return __crypto_acomp_tfm(crypto_acomp_tfm(tfm)->fb); } -static inline bool crypto_acomp_req_chain(struct crypto_acomp *tfm) +static inline struct acomp_req *acomp_fbreq_on_stack_init( + char *buf, struct acomp_req *old) { - return crypto_tfm_req_chain(&tfm->base); + struct crypto_acomp *tfm = crypto_acomp_reqtfm(old); + struct acomp_req *req = (void *)buf; + + crypto_stack_request_init(&req->base, + crypto_acomp_tfm(crypto_acomp_fb(tfm))); + acomp_request_set_callback(req, acomp_request_flags(old), NULL, NULL); + req->base.flags &= ~CRYPTO_ACOMP_REQ_PRIVATE; + req->base.flags |= old->base.flags & CRYPTO_ACOMP_REQ_PRIVATE; + req->src = old->src; + req->dst = old->dst; + req->slen = old->slen; + req->dlen = old->dlen; + + return req; } #endif diff --git a/include/crypto/internal/blake2b.h b/include/crypto/internal/blake2b.h index 982fe5e8471c..3e09e2485306 100644 --- a/include/crypto/internal/blake2b.h +++ b/include/crypto/internal/blake2b.h @@ -7,65 +7,36 @@ #ifndef _CRYPTO_INTERNAL_BLAKE2B_H #define _CRYPTO_INTERNAL_BLAKE2B_H +#include <asm/byteorder.h> #include <crypto/blake2b.h> #include <crypto/internal/hash.h> +#include <linux/array_size.h> +#include <linux/compiler.h> +#include <linux/build_bug.h> +#include <linux/errno.h> +#include <linux/math.h> #include <linux/string.h> - -void blake2b_compress_generic(struct blake2b_state *state, - const u8 *block, size_t nblocks, u32 inc); +#include <linux/types.h> static inline void blake2b_set_lastblock(struct blake2b_state *state) { state->f[0] = -1; + state->f[1] = 0; } -typedef void (*blake2b_compress_t)(struct blake2b_state *state, - const u8 *block, size_t nblocks, u32 inc); - -static inline void __blake2b_update(struct blake2b_state *state, - const u8 *in, size_t inlen, - blake2b_compress_t compress) +static inline void blake2b_set_nonlast(struct blake2b_state *state) { - const size_t fill = BLAKE2B_BLOCK_SIZE - state->buflen; - - if (unlikely(!inlen)) - return; - if (inlen > fill) { - memcpy(state->buf + state->buflen, in, fill); - (*compress)(state, state->buf, 1, BLAKE2B_BLOCK_SIZE); - state->buflen = 0; - in += fill; - inlen -= fill; - } - if (inlen > BLAKE2B_BLOCK_SIZE) { - const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2B_BLOCK_SIZE); - /* Hash one less (full) block than strictly possible */ - (*compress)(state, in, nblocks - 1, BLAKE2B_BLOCK_SIZE); - in += BLAKE2B_BLOCK_SIZE * (nblocks - 1); - inlen -= BLAKE2B_BLOCK_SIZE * (nblocks - 1); - } - memcpy(state->buf + state->buflen, in, inlen); - state->buflen += inlen; + state->f[0] = 0; + state->f[1] = 0; } -static inline void __blake2b_final(struct blake2b_state *state, u8 *out, - blake2b_compress_t compress) -{ - int i; - - blake2b_set_lastblock(state); - memset(state->buf + state->buflen, 0, - BLAKE2B_BLOCK_SIZE - state->buflen); /* Padding */ - (*compress)(state, state->buf, 1, state->buflen); - for (i = 0; i < ARRAY_SIZE(state->h); i++) - __cpu_to_le64s(&state->h[i]); - memcpy(out, state->h, state->outlen); -} +typedef void (*blake2b_compress_t)(struct blake2b_state *state, + const u8 *block, size_t nblocks, u32 inc); /* Helper functions for shash implementations of BLAKE2b */ struct blake2b_tfm_ctx { - u8 key[BLAKE2B_KEY_SIZE]; + u8 key[BLAKE2B_BLOCK_SIZE]; unsigned int keylen; }; @@ -74,10 +45,13 @@ static inline int crypto_blake2b_setkey(struct crypto_shash *tfm, { struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(tfm); - if (keylen == 0 || keylen > BLAKE2B_KEY_SIZE) + if (keylen > BLAKE2B_KEY_SIZE) return -EINVAL; + BUILD_BUG_ON(BLAKE2B_KEY_SIZE > BLAKE2B_BLOCK_SIZE); + memcpy(tctx->key, key, keylen); + memset(tctx->key + keylen, 0, BLAKE2B_BLOCK_SIZE - keylen); tctx->keylen = keylen; return 0; @@ -89,26 +63,38 @@ static inline int crypto_blake2b_init(struct shash_desc *desc) struct blake2b_state *state = shash_desc_ctx(desc); unsigned int outlen = crypto_shash_digestsize(desc->tfm); - __blake2b_init(state, outlen, tctx->key, tctx->keylen); - return 0; + __blake2b_init(state, outlen, tctx->keylen); + return tctx->keylen ? + crypto_shash_update(desc, tctx->key, BLAKE2B_BLOCK_SIZE) : 0; } -static inline int crypto_blake2b_update(struct shash_desc *desc, - const u8 *in, unsigned int inlen, - blake2b_compress_t compress) +static inline int crypto_blake2b_update_bo(struct shash_desc *desc, + const u8 *in, unsigned int inlen, + blake2b_compress_t compress) { struct blake2b_state *state = shash_desc_ctx(desc); - __blake2b_update(state, in, inlen, compress); - return 0; + blake2b_set_nonlast(state); + compress(state, in, inlen / BLAKE2B_BLOCK_SIZE, BLAKE2B_BLOCK_SIZE); + return inlen - round_down(inlen, BLAKE2B_BLOCK_SIZE); } -static inline int crypto_blake2b_final(struct shash_desc *desc, u8 *out, +static inline int crypto_blake2b_finup(struct shash_desc *desc, const u8 *in, + unsigned int inlen, u8 *out, blake2b_compress_t compress) { struct blake2b_state *state = shash_desc_ctx(desc); + u8 buf[BLAKE2B_BLOCK_SIZE]; + int i; - __blake2b_final(state, out, compress); + memcpy(buf, in, inlen); + memset(buf + inlen, 0, BLAKE2B_BLOCK_SIZE - inlen); + blake2b_set_lastblock(state); + compress(state, buf, 1, inlen); + for (i = 0; i < ARRAY_SIZE(state->h); i++) + __cpu_to_le64s(&state->h[i]); + memcpy(out, state->h, crypto_shash_digestsize(desc->tfm)); + memzero_explicit(buf, sizeof(buf)); return 0; } diff --git a/include/crypto/internal/blockhash.h b/include/crypto/internal/blockhash.h new file mode 100644 index 000000000000..52d9d4c82493 --- /dev/null +++ b/include/crypto/internal/blockhash.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Handle partial blocks for block hash. + * + * Copyright (c) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> + * Copyright (c) 2025 Herbert Xu <herbert@gondor.apana.org.au> + */ + +#ifndef _CRYPTO_INTERNAL_BLOCKHASH_H +#define _CRYPTO_INTERNAL_BLOCKHASH_H + +#include <linux/string.h> +#include <linux/types.h> + +#define BLOCK_HASH_UPDATE_BASE(block_fn, state, src, nbytes, bs, dv, \ + buf, buflen) \ + ({ \ + typeof(block_fn) *_block_fn = &(block_fn); \ + typeof(state + 0) _state = (state); \ + unsigned int _buflen = (buflen); \ + size_t _nbytes = (nbytes); \ + unsigned int _bs = (bs); \ + const u8 *_src = (src); \ + u8 *_buf = (buf); \ + while ((_buflen + _nbytes) >= _bs) { \ + const u8 *data = _src; \ + size_t len = _nbytes; \ + size_t blocks; \ + int remain; \ + if (_buflen) { \ + remain = _bs - _buflen; \ + memcpy(_buf + _buflen, _src, remain); \ + data = _buf; \ + len = _bs; \ + } \ + remain = len % bs; \ + blocks = (len - remain) / (dv); \ + (*_block_fn)(_state, data, blocks); \ + _src += len - remain - _buflen; \ + _nbytes -= len - remain - _buflen; \ + _buflen = 0; \ + } \ + memcpy(_buf + _buflen, _src, _nbytes); \ + _buflen += _nbytes; \ + }) + +#define BLOCK_HASH_UPDATE(block, state, src, nbytes, bs, buf, buflen) \ + BLOCK_HASH_UPDATE_BASE(block, state, src, nbytes, bs, 1, buf, buflen) +#define BLOCK_HASH_UPDATE_BLOCKS(block, state, src, nbytes, bs, buf, buflen) \ + BLOCK_HASH_UPDATE_BASE(block, state, src, nbytes, bs, bs, buf, buflen) + +#endif /* _CRYPTO_INTERNAL_BLOCKHASH_H */ diff --git a/include/crypto/internal/chacha.h b/include/crypto/internal/chacha.h deleted file mode 100644 index b085dc1ac151..000000000000 --- a/include/crypto/internal/chacha.h +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _CRYPTO_INTERNAL_CHACHA_H -#define _CRYPTO_INTERNAL_CHACHA_H - -#include <crypto/chacha.h> -#include <crypto/internal/skcipher.h> -#include <linux/crypto.h> - -struct chacha_ctx { - u32 key[8]; - int nrounds; -}; - -static inline int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize, int nrounds) -{ - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - int i; - - if (keysize != CHACHA_KEY_SIZE) - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(ctx->key); i++) - ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32)); - - ctx->nrounds = nrounds; - return 0; -} - -static inline int chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize) -{ - return chacha_setkey(tfm, key, keysize, 20); -} - -static inline int chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize) -{ - return chacha_setkey(tfm, key, keysize, 12); -} - -#endif /* _CRYPTO_CHACHA_H */ diff --git a/include/crypto/internal/engine.h b/include/crypto/internal/engine.h index fbf4be56cf12..b6a4ea2240fc 100644 --- a/include/crypto/internal/engine.h +++ b/include/crypto/internal/engine.h @@ -27,10 +27,10 @@ struct device; * @retry_support: indication that the hardware allows re-execution * of a failed backlog request * crypto-engine, in head position to keep order + * @rt: whether this queue is set to run as a realtime task * @list: link with the global crypto engine list * @queue_lock: spinlock to synchronise access to request queue * @queue: the crypto queue of the engine - * @rt: whether this queue is set to run as a realtime task * @prepare_crypt_hardware: a request will soon arrive from the queue * so the subsystem requests the driver to prepare the hardware * by issuing this call @@ -51,14 +51,13 @@ struct crypto_engine { bool running; bool retry_support; + bool rt; struct list_head list; spinlock_t queue_lock; struct crypto_queue queue; struct device *dev; - bool rt; - int (*prepare_crypt_hardware)(struct crypto_engine *engine); int (*unprepare_crypt_hardware)(struct crypto_engine *engine); int (*do_batch_requests)(struct crypto_engine *engine); diff --git a/include/crypto/internal/geniv.h b/include/crypto/internal/geniv.h index 7fd7126f593a..012f5fb22d43 100644 --- a/include/crypto/internal/geniv.h +++ b/include/crypto/internal/geniv.h @@ -15,7 +15,6 @@ struct aead_geniv_ctx { spinlock_t lock; struct crypto_aead *child; - struct crypto_sync_skcipher *sknull; u8 salt[] __attribute__ ((aligned(__alignof__(u32)))); }; diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 485e22cf517e..0f85c543f80b 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -11,6 +11,24 @@ #include <crypto/algapi.h> #include <crypto/hash.h> +/* Set this bit to handle partial blocks in the API. */ +#define CRYPTO_AHASH_ALG_BLOCK_ONLY 0x01000000 + +/* Set this bit if final requires at least one byte. */ +#define CRYPTO_AHASH_ALG_FINAL_NONZERO 0x02000000 + +/* Set this bit if finup can deal with multiple blocks. */ +#define CRYPTO_AHASH_ALG_FINUP_MAX 0x04000000 + +/* This bit is set by the Crypto API if export_core is not supported. */ +#define CRYPTO_AHASH_ALG_NO_EXPORT_CORE 0x08000000 + +#define HASH_FBREQ_ON_STACK(name, req) \ + char __##name##_req[sizeof(struct ahash_request) + \ + MAX_SYNC_HASH_REQSIZE] CRYPTO_MINALIGN_ATTR; \ + struct ahash_request *name = ahash_fbreq_on_stack_init( \ + __##name##_req, (req)) + struct ahash_request; struct ahash_instance { @@ -49,6 +67,7 @@ int crypto_register_ahashes(struct ahash_alg *algs, int count); void crypto_unregister_ahashes(struct ahash_alg *algs, int count); int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst); +void ahash_free_singlespawn_instance(struct ahash_instance *inst); int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen); @@ -58,12 +77,20 @@ static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg) return alg->setkey != shash_no_setkey; } +bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg); + static inline bool crypto_shash_alg_needs_key(struct shash_alg *alg) { return crypto_shash_alg_has_setkey(alg) && !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY); } +static inline bool crypto_hash_alg_needs_key(struct hash_alg_common *alg) +{ + return crypto_hash_alg_has_setkey(alg) && + !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY); +} + int crypto_grab_ahash(struct crypto_ahash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); @@ -187,7 +214,7 @@ static inline void ahash_request_complete(struct ahash_request *req, int err) static inline u32 ahash_request_flags(struct ahash_request *req) { - return req->base.flags; + return crypto_request_flags(&req->base) & ~CRYPTO_AHASH_REQ_PRIVATE; } static inline struct crypto_ahash *crypto_spawn_ahash( @@ -247,20 +274,96 @@ static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm) return container_of(tfm, struct crypto_shash, base); } -static inline bool ahash_request_chained(struct ahash_request *req) +static inline bool ahash_request_isvirt(struct ahash_request *req) { - return crypto_request_chained(&req->base); + return req->base.flags & CRYPTO_AHASH_REQ_VIRT; } -static inline bool ahash_request_isvirt(struct ahash_request *req) +static inline bool crypto_ahash_req_virt(struct crypto_ahash *tfm) { - return req->base.flags & CRYPTO_AHASH_REQ_VIRT; + return crypto_tfm_req_virt(&tfm->base); } -static inline bool crypto_ahash_req_chain(struct crypto_ahash *tfm) +static inline struct crypto_ahash *crypto_ahash_fb(struct crypto_ahash *tfm) { - return crypto_tfm_req_chain(&tfm->base); + return __crypto_ahash_cast(crypto_ahash_tfm(tfm)->fb); } +static inline struct ahash_request *ahash_fbreq_on_stack_init( + char *buf, struct ahash_request *old) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(old); + struct ahash_request *req = (void *)buf; + + crypto_stack_request_init(&req->base, + crypto_ahash_tfm(crypto_ahash_fb(tfm))); + ahash_request_set_callback(req, ahash_request_flags(old), NULL, NULL); + req->base.flags &= ~CRYPTO_AHASH_REQ_PRIVATE; + req->base.flags |= old->base.flags & CRYPTO_AHASH_REQ_PRIVATE; + req->src = old->src; + req->result = old->result; + req->nbytes = old->nbytes; + + return req; +} + +/* Return the state size without partial block for block-only algorithms. */ +static inline unsigned int crypto_shash_coresize(struct crypto_shash *tfm) +{ + return crypto_shash_statesize(tfm) - crypto_shash_blocksize(tfm) - 1; +} + +/* This can only be used if the request was never cloned. */ +#define HASH_REQUEST_ZERO(name) \ + memzero_explicit(__##name##_req, sizeof(__##name##_req)) + +/** + * crypto_ahash_export_core() - extract core state for message digest + * @req: reference to the ahash_request handle whose state is exported + * @out: output buffer of sufficient size that can hold the hash state + * + * Export the hash state without the partial block buffer. + * + * Context: Softirq or process context. + * Return: 0 if the export creation was successful; < 0 if an error occurred + */ +int crypto_ahash_export_core(struct ahash_request *req, void *out); + +/** + * crypto_ahash_import_core() - import core state + * @req: reference to ahash_request handle the state is imported into + * @in: buffer holding the state + * + * Import the hash state without the partial block buffer. + * + * Context: Softirq or process context. + * Return: 0 if the import was successful; < 0 if an error occurred + */ +int crypto_ahash_import_core(struct ahash_request *req, const void *in); + +/** + * crypto_shash_export_core() - extract core state for message digest + * @desc: reference to the operational state handle whose state is exported + * @out: output buffer of sufficient size that can hold the hash state + * + * Export the hash state without the partial block buffer. + * + * Context: Softirq or process context. + * Return: 0 if the export creation was successful; < 0 if an error occurred + */ +int crypto_shash_export_core(struct shash_desc *desc, void *out); + +/** + * crypto_shash_import_core() - import core state + * @desc: reference to the operational state handle the state imported into + * @in: buffer holding the state + * + * Import the hash state without the partial block buffer. + * + * Context: Softirq or process context. + * Return: 0 if the import was successful; < 0 if an error occurred + */ +int crypto_shash_import_core(struct shash_desc *desc, const void *in); + #endif /* _CRYPTO_INTERNAL_HASH_H */ diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h index e614594f88c1..c60315f47562 100644 --- a/include/crypto/internal/poly1305.h +++ b/include/crypto/internal/poly1305.h @@ -6,9 +6,8 @@ #ifndef _CRYPTO_INTERNAL_POLY1305_H #define _CRYPTO_INTERNAL_POLY1305_H -#include <linux/unaligned.h> -#include <linux/types.h> #include <crypto/poly1305.h> +#include <linux/types.h> /* * Poly1305 core functions. These only accept whole blocks; the caller must @@ -31,4 +30,29 @@ void poly1305_core_blocks(struct poly1305_state *state, void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4], void *dst); +void poly1305_block_init_arch(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +void poly1305_block_init_generic(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit); + +static inline void poly1305_blocks_generic(struct poly1305_block_state *state, + const u8 *src, unsigned int len, + u32 padbit) +{ + poly1305_core_blocks(&state->h, &state->core_r, src, + len / POLY1305_BLOCK_SIZE, padbit); +} + +void poly1305_emit_arch(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], const u32 nonce[4]); + +static inline void poly1305_emit_generic(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]) +{ + poly1305_core_emit(state, nonce, digest); +} + #endif diff --git a/include/crypto/internal/scompress.h b/include/crypto/internal/scompress.h index f25aa2ea3b48..533d6c16a491 100644 --- a/include/crypto/internal/scompress.h +++ b/include/crypto/internal/scompress.h @@ -9,10 +9,7 @@ #ifndef _CRYPTO_SCOMP_INT_H #define _CRYPTO_SCOMP_INT_H -#include <crypto/acompress.h> -#include <crypto/algapi.h> - -struct acomp_req; +#include <crypto/internal/acompress.h> struct crypto_scomp { struct crypto_tfm base; @@ -26,12 +23,10 @@ struct crypto_scomp { * @compress: Function performs a compress operation * @decompress: Function performs a de-compress operation * @base: Common crypto API algorithm data structure - * @stream: Per-cpu memory for algorithm + * @streams: Per-cpu memory for algorithm * @calg: Cmonn algorithm data structure shared with acomp */ struct scomp_alg { - void *(*alloc_ctx)(void); - void (*free_ctx)(void *ctx); int (*compress)(struct crypto_scomp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx); @@ -40,6 +35,14 @@ struct scomp_alg { void *ctx); union { + struct { + void *(*alloc_ctx)(void); + void (*free_ctx)(void *ctx); + }; + struct crypto_acomp_streams streams; + }; + + union { struct COMP_ALG_COMMON; struct comp_alg_common calg; }; diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h new file mode 100644 index 000000000000..b9bccd3ff57f --- /dev/null +++ b/include/crypto/internal/sha2.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _CRYPTO_INTERNAL_SHA2_H +#define _CRYPTO_INTERNAL_SHA2_H + +#include <crypto/internal/simd.h> +#include <crypto/sha2.h> +#include <linux/compiler_attributes.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/unaligned.h> + +#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) +bool sha256_is_arch_optimized(void); +#else +static inline bool sha256_is_arch_optimized(void) +{ + return false; +} +#endif +void sha256_blocks_generic(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); +void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); +void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); + +static inline void sha256_choose_blocks( + u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks, + bool force_generic, bool force_simd) +{ + if (!IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) || force_generic) + sha256_blocks_generic(state, data, nblocks); + else if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD) && + (force_simd || crypto_simd_usable())) + sha256_blocks_simd(state, data, nblocks); + else + sha256_blocks_arch(state, data, nblocks); +} + +static __always_inline void sha256_finup( + struct crypto_sha256_state *sctx, u8 buf[SHA256_BLOCK_SIZE], + size_t len, u8 out[SHA256_DIGEST_SIZE], size_t digest_size, + bool force_generic, bool force_simd) +{ + const size_t bit_offset = SHA256_BLOCK_SIZE - 8; + __be64 *bits = (__be64 *)&buf[bit_offset]; + int i; + + buf[len++] = 0x80; + if (len > bit_offset) { + memset(&buf[len], 0, SHA256_BLOCK_SIZE - len); + sha256_choose_blocks(sctx->state, buf, 1, force_generic, + force_simd); + len = 0; + } + + memset(&buf[len], 0, bit_offset - len); + *bits = cpu_to_be64(sctx->count << 3); + sha256_choose_blocks(sctx->state, buf, 1, force_generic, force_simd); + + for (i = 0; i < digest_size; i += 4) + put_unaligned_be32(sctx->state[i / 4], out + i); +} + +#endif /* _CRYPTO_INTERNAL_SHA2_H */ diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h index be97b97a75dd..7e7f1ac3b7fd 100644 --- a/include/crypto/internal/simd.h +++ b/include/crypto/internal/simd.h @@ -6,6 +6,7 @@ #ifndef _CRYPTO_INTERNAL_SIMD_H #define _CRYPTO_INTERNAL_SIMD_H +#include <asm/simd.h> #include <linux/percpu.h> #include <linux/types.h> @@ -43,14 +44,9 @@ void simd_unregister_aeads(struct aead_alg *algs, int count, * * This delegates to may_use_simd(), except that this also returns false if SIMD * in crypto code has been temporarily disabled on this CPU by the crypto - * self-tests, in order to test the no-SIMD fallback code. This override is - * currently limited to configurations where the extra self-tests are enabled, - * because it might be a bit too invasive to be part of the regular self-tests. - * - * This is a macro so that <asm/simd.h>, which some architectures don't have, - * doesn't have to be included directly here. + * self-tests, in order to test the no-SIMD fallback code. */ -#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS +#ifdef CONFIG_CRYPTO_SELFTESTS DECLARE_PER_CPU(bool, crypto_simd_disabled_for_test); #define crypto_simd_usable() \ (may_use_simd() && !this_cpu_read(crypto_simd_disabled_for_test)) diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index a958ab0636ad..d5aa535263f6 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -10,6 +10,7 @@ #include <crypto/algapi.h> #include <crypto/internal/cipher.h> +#include <crypto/scatterwalk.h> #include <crypto/skcipher.h> #include <linux/types.h> @@ -54,48 +55,6 @@ struct crypto_lskcipher_spawn { struct crypto_spawn base; }; -struct skcipher_walk { - union { - /* Virtual address of the source. */ - struct { - struct { - const void *const addr; - } virt; - } src; - - /* Private field for the API, do not use. */ - struct scatter_walk in; - }; - - unsigned int nbytes; - - union { - /* Virtual address of the destination. */ - struct { - struct { - void *const addr; - } virt; - } dst; - - /* Private field for the API, do not use. */ - struct scatter_walk out; - }; - - unsigned int total; - - u8 *page; - u8 *buffer; - u8 *oiv; - void *iv; - - unsigned int ivsize; - - int flags; - unsigned int blocksize; - unsigned int stride; - unsigned int alignmask; -}; - static inline struct crypto_instance *skcipher_crypto_instance( struct skcipher_instance *inst) { @@ -212,7 +171,6 @@ void crypto_unregister_lskciphers(struct lskcipher_alg *algs, int count); int lskcipher_register_instance(struct crypto_template *tmpl, struct lskcipher_instance *inst); -int skcipher_walk_done(struct skcipher_walk *walk, int res); int skcipher_walk_virt(struct skcipher_walk *__restrict walk, struct skcipher_request *__restrict req, bool atomic); @@ -223,11 +181,6 @@ int skcipher_walk_aead_decrypt(struct skcipher_walk *__restrict walk, struct aead_request *__restrict req, bool atomic); -static inline void skcipher_walk_abort(struct skcipher_walk *walk) -{ - skcipher_walk_done(walk, -ECANCELED); -} - static inline void *crypto_skcipher_ctx(struct crypto_skcipher *tfm) { return crypto_tfm_ctx(&tfm->base); diff --git a/include/crypto/md5.h b/include/crypto/md5.h index cf9e9dec3d21..198b5d69b92f 100644 --- a/include/crypto/md5.h +++ b/include/crypto/md5.h @@ -8,6 +8,7 @@ #define MD5_HMAC_BLOCK_SIZE 64 #define MD5_BLOCK_WORDS 16 #define MD5_HASH_WORDS 4 +#define MD5_STATE_SIZE 24 #define MD5_H0 0x67452301UL #define MD5_H1 0xefcdab89UL @@ -18,8 +19,8 @@ extern const u8 md5_zero_message_hash[MD5_DIGEST_SIZE]; struct md5_state { u32 hash[MD5_HASH_WORDS]; - u32 block[MD5_BLOCK_WORDS]; u64 byte_count; + u32 block[MD5_BLOCK_WORDS]; }; #endif diff --git a/include/crypto/null.h b/include/crypto/null.h index 0ef577cc00e3..1c66abf9de3b 100644 --- a/include/crypto/null.h +++ b/include/crypto/null.h @@ -9,7 +9,4 @@ #define NULL_DIGEST_SIZE 0 #define NULL_IV_SIZE 0 -struct crypto_sync_skcipher *crypto_get_default_null_skcipher(void); -void crypto_put_default_null_skcipher(void); - #endif diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index 090692ec3bc7..e54abda8cfe9 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -7,7 +7,6 @@ #define _CRYPTO_POLY1305_H #include <linux/types.h> -#include <linux/crypto.h> #define POLY1305_BLOCK_SIZE 16 #define POLY1305_KEY_SIZE 32 @@ -38,17 +37,8 @@ struct poly1305_state { }; }; -struct poly1305_desc_ctx { - /* partial buffer */ - u8 buf[POLY1305_BLOCK_SIZE]; - /* bytes used in partial buffer */ - unsigned int buflen; - /* how many keys have been set in r[] */ - unsigned short rset; - /* whether s[] has been set */ - bool sset; - /* finalize key */ - u32 s[4]; +/* Combined state for block function. */ +struct poly1305_block_state { /* accumulator */ struct poly1305_state h; /* key */ @@ -58,42 +48,29 @@ struct poly1305_desc_ctx { }; }; -void poly1305_init_arch(struct poly1305_desc_ctx *desc, - const u8 key[POLY1305_KEY_SIZE]); -void poly1305_init_generic(struct poly1305_desc_ctx *desc, - const u8 key[POLY1305_KEY_SIZE]); - -static inline void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_init_arch(desc, key); - else - poly1305_init_generic(desc, key); -} - -void poly1305_update_arch(struct poly1305_desc_ctx *desc, const u8 *src, - unsigned int nbytes); -void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src, - unsigned int nbytes); - -static inline void poly1305_update(struct poly1305_desc_ctx *desc, - const u8 *src, unsigned int nbytes) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_update_arch(desc, src, nbytes); - else - poly1305_update_generic(desc, src, nbytes); -} +struct poly1305_desc_ctx { + /* partial buffer */ + u8 buf[POLY1305_BLOCK_SIZE]; + /* bytes used in partial buffer */ + unsigned int buflen; + /* finalize key */ + u32 s[4]; + struct poly1305_block_state state; +}; -void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest); -void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *digest); +void poly1305_init(struct poly1305_desc_ctx *desc, + const u8 key[POLY1305_KEY_SIZE]); +void poly1305_update(struct poly1305_desc_ctx *desc, + const u8 *src, unsigned int nbytes); +void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest); -static inline void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest) +#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305) +bool poly1305_is_arch_optimized(void); +#else +static inline bool poly1305_is_arch_optimized(void) { - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_final_arch(desc, digest); - else - poly1305_final_generic(desc, digest); + return false; } +#endif #endif diff --git a/include/crypto/polyval.h b/include/crypto/polyval.h index 1d630f371f77..d2e63743e592 100644 --- a/include/crypto/polyval.h +++ b/include/crypto/polyval.h @@ -8,15 +8,7 @@ #ifndef _CRYPTO_POLYVAL_H #define _CRYPTO_POLYVAL_H -#include <linux/types.h> -#include <linux/crypto.h> - #define POLYVAL_BLOCK_SIZE 16 #define POLYVAL_DIGEST_SIZE 16 -void polyval_mul_non4k(u8 *op1, const u8 *op2); - -void polyval_update_non4k(const u8 *key, const u8 *in, - size_t nblocks, u8 *accumulator); - #endif diff --git a/include/crypto/rng.h b/include/crypto/rng.h index 5ac4388f50e1..f8224cc390f8 100644 --- a/include/crypto/rng.h +++ b/include/crypto/rng.h @@ -102,12 +102,10 @@ static inline struct rng_alg *__crypto_rng_alg(struct crypto_alg *alg) } /** - * crypto_rng_alg - obtain name of RNG - * @tfm: cipher handle - * - * Return the generic name (cra_name) of the initialized random number generator + * crypto_rng_alg() - obtain 'struct rng_alg' pointer from RNG handle + * @tfm: RNG handle * - * Return: generic name string + * Return: Pointer to 'struct rng_alg', derived from @tfm RNG handle */ static inline struct rng_alg *crypto_rng_alg(struct crypto_rng *tfm) { diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h index 94a8585f26b2..15ab743f68c8 100644 --- a/include/crypto/scatterwalk.h +++ b/include/crypto/scatterwalk.h @@ -11,11 +11,64 @@ #ifndef _CRYPTO_SCATTERWALK_H #define _CRYPTO_SCATTERWALK_H -#include <crypto/algapi.h> - +#include <linux/errno.h> #include <linux/highmem.h> #include <linux/mm.h> #include <linux/scatterlist.h> +#include <linux/types.h> + +struct scatter_walk { + /* Must be the first member, see struct skcipher_walk. */ + union { + void *const addr; + + /* Private API field, do not touch. */ + union crypto_no_such_thing *__addr; + }; + struct scatterlist *sg; + unsigned int offset; +}; + +struct skcipher_walk { + union { + /* Virtual address of the source. */ + struct { + struct { + const void *const addr; + } virt; + } src; + + /* Private field for the API, do not use. */ + struct scatter_walk in; + }; + + union { + /* Virtual address of the destination. */ + struct { + struct { + void *const addr; + } virt; + } dst; + + /* Private field for the API, do not use. */ + struct scatter_walk out; + }; + + unsigned int nbytes; + unsigned int total; + + u8 *page; + u8 *buffer; + u8 *oiv; + void *iv; + + unsigned int ivsize; + + int flags; + unsigned int blocksize; + unsigned int stride; + unsigned int alignmask; +}; static inline void scatterwalk_crypto_chain(struct scatterlist *head, struct scatterlist *sg, int num) @@ -243,4 +296,12 @@ struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2], struct scatterlist *src, unsigned int len); +int skcipher_walk_first(struct skcipher_walk *walk, bool atomic); +int skcipher_walk_done(struct skcipher_walk *walk, int res); + +static inline void skcipher_walk_abort(struct skcipher_walk *walk) +{ + skcipher_walk_done(walk, -ECANCELED); +} + #endif /* _CRYPTO_SCATTERWALK_H */ diff --git a/include/crypto/sha1.h b/include/crypto/sha1.h index 044ecea60ac8..f48230b1413c 100644 --- a/include/crypto/sha1.h +++ b/include/crypto/sha1.h @@ -10,6 +10,7 @@ #define SHA1_DIGEST_SIZE 20 #define SHA1_BLOCK_SIZE 64 +#define SHA1_STATE_SIZE offsetof(struct sha1_state, buffer) #define SHA1_H0 0x67452301UL #define SHA1_H1 0xefcdab89UL @@ -25,14 +26,6 @@ struct sha1_state { u8 buffer[SHA1_BLOCK_SIZE]; }; -struct shash_desc; - -extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len); - -extern int crypto_sha1_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *hash); - /* * An implementation of SHA-1's compression function. Don't use in new code! * You shouldn't be using SHA-1, and even if you *have* to use SHA-1, this isn't diff --git a/include/crypto/sha1_base.h b/include/crypto/sha1_base.h index 0c342ed0d038..62701d136c79 100644 --- a/include/crypto/sha1_base.h +++ b/include/crypto/sha1_base.h @@ -10,10 +10,9 @@ #include <crypto/internal/hash.h> #include <crypto/sha1.h> -#include <linux/crypto.h> -#include <linux/module.h> +#include <linux/math.h> #include <linux/string.h> - +#include <linux/types.h> #include <linux/unaligned.h> typedef void (sha1_block_fn)(struct sha1_state *sst, u8 const *src, int blocks); @@ -32,63 +31,38 @@ static inline int sha1_base_init(struct shash_desc *desc) return 0; } -static inline int sha1_base_do_update(struct shash_desc *desc, - const u8 *data, - unsigned int len, - sha1_block_fn *block_fn) +static inline int sha1_base_do_update_blocks(struct shash_desc *desc, + const u8 *data, + unsigned int len, + sha1_block_fn *block_fn) { + unsigned int remain = len - round_down(len, SHA1_BLOCK_SIZE); struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; - - sctx->count += len; - - if (unlikely((partial + len) >= SHA1_BLOCK_SIZE)) { - int blocks; - - if (partial) { - int p = SHA1_BLOCK_SIZE - partial; - - memcpy(sctx->buffer + partial, data, p); - data += p; - len -= p; - - block_fn(sctx, sctx->buffer, 1); - } - blocks = len / SHA1_BLOCK_SIZE; - len %= SHA1_BLOCK_SIZE; - - if (blocks) { - block_fn(sctx, data, blocks); - data += blocks * SHA1_BLOCK_SIZE; - } - partial = 0; - } - if (len) - memcpy(sctx->buffer + partial, data, len); - - return 0; + sctx->count += len - remain; + block_fn(sctx, data, len / SHA1_BLOCK_SIZE); + return remain; } -static inline int sha1_base_do_finalize(struct shash_desc *desc, - sha1_block_fn *block_fn) +static inline int sha1_base_do_finup(struct shash_desc *desc, + const u8 *src, unsigned int len, + sha1_block_fn *block_fn) { - const int bit_offset = SHA1_BLOCK_SIZE - sizeof(__be64); + unsigned int bit_offset = SHA1_BLOCK_SIZE / 8 - 1; struct sha1_state *sctx = shash_desc_ctx(desc); - __be64 *bits = (__be64 *)(sctx->buffer + bit_offset); - unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; - - sctx->buffer[partial++] = 0x80; - if (partial > bit_offset) { - memset(sctx->buffer + partial, 0x0, SHA1_BLOCK_SIZE - partial); - partial = 0; - - block_fn(sctx, sctx->buffer, 1); - } - - memset(sctx->buffer + partial, 0x0, bit_offset - partial); - *bits = cpu_to_be64(sctx->count << 3); - block_fn(sctx, sctx->buffer, 1); + union { + __be64 b64[SHA1_BLOCK_SIZE / 4]; + u8 u8[SHA1_BLOCK_SIZE * 2]; + } block = {}; + + if (len >= bit_offset * 8) + bit_offset += SHA1_BLOCK_SIZE / 8; + memcpy(&block, src, len); + block.u8[len] = 0x80; + sctx->count += len; + block.b64[bit_offset] = cpu_to_be64(sctx->count << 3); + block_fn(sctx, block.u8, (bit_offset + 1) * 8 / SHA1_BLOCK_SIZE); + memzero_explicit(&block, sizeof(block)); return 0; } @@ -102,7 +76,6 @@ static inline int sha1_base_finish(struct shash_desc *desc, u8 *out) for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) put_unaligned_be32(sctx->state[i], digest++); - memzero_explicit(sctx, sizeof(*sctx)); return 0; } diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index b9e9281d76c9..4912572578dc 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -13,12 +13,14 @@ #define SHA256_DIGEST_SIZE 32 #define SHA256_BLOCK_SIZE 64 +#define SHA256_STATE_WORDS 8 #define SHA384_DIGEST_SIZE 48 #define SHA384_BLOCK_SIZE 128 #define SHA512_DIGEST_SIZE 64 #define SHA512_BLOCK_SIZE 128 +#define SHA512_STATE_SIZE 80 #define SHA224_H0 0xc1059ed8UL #define SHA224_H1 0x367cd507UL @@ -64,9 +66,19 @@ extern const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE]; extern const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE]; -struct sha256_state { - u32 state[SHA256_DIGEST_SIZE / 4]; +struct crypto_sha256_state { + u32 state[SHA256_STATE_WORDS]; u64 count; +}; + +struct sha256_state { + union { + struct crypto_sha256_state ctx; + struct { + u32 state[SHA256_STATE_WORDS]; + u64 count; + }; + }; u8 buf[SHA256_BLOCK_SIZE]; }; @@ -76,31 +88,7 @@ struct sha512_state { u8 buf[SHA512_BLOCK_SIZE]; }; -struct shash_desc; - -extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len); - -extern int crypto_sha256_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *hash); - -extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data, - unsigned int len); - -extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *hash); - -/* - * Stand-alone implementation of the SHA256 algorithm. It is designed to - * have as little dependencies as possible so it can be used in the - * kexec_file purgatory. In other cases you should generally use the - * hash APIs from include/crypto/hash.h. Especially when hashing large - * amounts of data as those APIs may be hw-accelerated. - * - * For details see lib/crypto/sha256.c - */ - -static inline void sha256_init(struct sha256_state *sctx) +static inline void sha256_block_init(struct crypto_sha256_state *sctx) { sctx->state[0] = SHA256_H0; sctx->state[1] = SHA256_H1; @@ -112,11 +100,16 @@ static inline void sha256_init(struct sha256_state *sctx) sctx->state[7] = SHA256_H7; sctx->count = 0; } -void sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len); -void sha256_final(struct sha256_state *sctx, u8 *out); -void sha256(const u8 *data, unsigned int len, u8 *out); -static inline void sha224_init(struct sha256_state *sctx) +static inline void sha256_init(struct sha256_state *sctx) +{ + sha256_block_init(&sctx->ctx); +} +void sha256_update(struct sha256_state *sctx, const u8 *data, size_t len); +void sha256_final(struct sha256_state *sctx, u8 out[SHA256_DIGEST_SIZE]); +void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]); + +static inline void sha224_block_init(struct crypto_sha256_state *sctx) { sctx->state[0] = SHA224_H0; sctx->state[1] = SHA224_H1; @@ -128,7 +121,12 @@ static inline void sha224_init(struct sha256_state *sctx) sctx->state[7] = SHA224_H7; sctx->count = 0; } + +static inline void sha224_init(struct sha256_state *sctx) +{ + sha224_block_init(&sctx->ctx); +} /* Simply use sha256_update as it is equivalent to sha224_update. */ -void sha224_final(struct sha256_state *sctx, u8 *out); +void sha224_final(struct sha256_state *sctx, u8 out[SHA224_DIGEST_SIZE]); #endif /* _CRYPTO_SHA2_H */ diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h deleted file mode 100644 index e0418818d63c..000000000000 --- a/include/crypto/sha256_base.h +++ /dev/null @@ -1,135 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * sha256_base.h - core logic for SHA-256 implementations - * - * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> - */ - -#ifndef _CRYPTO_SHA256_BASE_H -#define _CRYPTO_SHA256_BASE_H - -#include <asm/byteorder.h> -#include <linux/unaligned.h> -#include <crypto/internal/hash.h> -#include <crypto/sha2.h> -#include <linux/string.h> -#include <linux/types.h> - -typedef void (sha256_block_fn)(struct sha256_state *sst, u8 const *src, - int blocks); - -static inline int sha224_base_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - sha224_init(sctx); - return 0; -} - -static inline int sha256_base_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - sha256_init(sctx); - return 0; -} - -static inline int lib_sha256_base_do_update(struct sha256_state *sctx, - const u8 *data, - unsigned int len, - sha256_block_fn *block_fn) -{ - unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; - - sctx->count += len; - - if (unlikely((partial + len) >= SHA256_BLOCK_SIZE)) { - int blocks; - - if (partial) { - int p = SHA256_BLOCK_SIZE - partial; - - memcpy(sctx->buf + partial, data, p); - data += p; - len -= p; - - block_fn(sctx, sctx->buf, 1); - } - - blocks = len / SHA256_BLOCK_SIZE; - len %= SHA256_BLOCK_SIZE; - - if (blocks) { - block_fn(sctx, data, blocks); - data += blocks * SHA256_BLOCK_SIZE; - } - partial = 0; - } - if (len) - memcpy(sctx->buf + partial, data, len); - - return 0; -} - -static inline int sha256_base_do_update(struct shash_desc *desc, - const u8 *data, - unsigned int len, - sha256_block_fn *block_fn) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - return lib_sha256_base_do_update(sctx, data, len, block_fn); -} - -static inline int lib_sha256_base_do_finalize(struct sha256_state *sctx, - sha256_block_fn *block_fn) -{ - const int bit_offset = SHA256_BLOCK_SIZE - sizeof(__be64); - __be64 *bits = (__be64 *)(sctx->buf + bit_offset); - unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; - - sctx->buf[partial++] = 0x80; - if (partial > bit_offset) { - memset(sctx->buf + partial, 0x0, SHA256_BLOCK_SIZE - partial); - partial = 0; - - block_fn(sctx, sctx->buf, 1); - } - - memset(sctx->buf + partial, 0x0, bit_offset - partial); - *bits = cpu_to_be64(sctx->count << 3); - block_fn(sctx, sctx->buf, 1); - - return 0; -} - -static inline int sha256_base_do_finalize(struct shash_desc *desc, - sha256_block_fn *block_fn) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - return lib_sha256_base_do_finalize(sctx, block_fn); -} - -static inline int lib_sha256_base_finish(struct sha256_state *sctx, u8 *out, - unsigned int digest_size) -{ - __be32 *digest = (__be32 *)out; - int i; - - for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be32)) - put_unaligned_be32(sctx->state[i], digest++); - - memzero_explicit(sctx, sizeof(*sctx)); - return 0; -} - -static inline int sha256_base_finish(struct shash_desc *desc, u8 *out) -{ - unsigned int digest_size = crypto_shash_digestsize(desc->tfm); - struct sha256_state *sctx = shash_desc_ctx(desc); - - return lib_sha256_base_finish(sctx, out, digest_size); -} - -#endif /* _CRYPTO_SHA256_BASE_H */ diff --git a/include/crypto/sha3.h b/include/crypto/sha3.h index 080f60c2e6b1..41e1b83a6d91 100644 --- a/include/crypto/sha3.h +++ b/include/crypto/sha3.h @@ -5,30 +5,32 @@ #ifndef __CRYPTO_SHA3_H__ #define __CRYPTO_SHA3_H__ +#include <linux/types.h> + #define SHA3_224_DIGEST_SIZE (224 / 8) #define SHA3_224_BLOCK_SIZE (200 - 2 * SHA3_224_DIGEST_SIZE) +#define SHA3_224_EXPORT_SIZE SHA3_STATE_SIZE + SHA3_224_BLOCK_SIZE + 1 #define SHA3_256_DIGEST_SIZE (256 / 8) #define SHA3_256_BLOCK_SIZE (200 - 2 * SHA3_256_DIGEST_SIZE) +#define SHA3_256_EXPORT_SIZE SHA3_STATE_SIZE + SHA3_256_BLOCK_SIZE + 1 #define SHA3_384_DIGEST_SIZE (384 / 8) #define SHA3_384_BLOCK_SIZE (200 - 2 * SHA3_384_DIGEST_SIZE) +#define SHA3_384_EXPORT_SIZE SHA3_STATE_SIZE + SHA3_384_BLOCK_SIZE + 1 #define SHA3_512_DIGEST_SIZE (512 / 8) #define SHA3_512_BLOCK_SIZE (200 - 2 * SHA3_512_DIGEST_SIZE) +#define SHA3_512_EXPORT_SIZE SHA3_STATE_SIZE + SHA3_512_BLOCK_SIZE + 1 -struct sha3_state { - u64 st[25]; - unsigned int rsiz; - unsigned int rsizw; +#define SHA3_STATE_SIZE 200 - unsigned int partial; - u8 buf[SHA3_224_BLOCK_SIZE]; +struct shash_desc; + +struct sha3_state { + u64 st[SHA3_STATE_SIZE / 8]; }; int crypto_sha3_init(struct shash_desc *desc); -int crypto_sha3_update(struct shash_desc *desc, const u8 *data, - unsigned int len); -int crypto_sha3_final(struct shash_desc *desc, u8 *out); #endif diff --git a/include/crypto/sha512_base.h b/include/crypto/sha512_base.h index 679916a84cb2..aa814bab442d 100644 --- a/include/crypto/sha512_base.h +++ b/include/crypto/sha512_base.h @@ -10,10 +10,10 @@ #include <crypto/internal/hash.h> #include <crypto/sha2.h> -#include <linux/crypto.h> -#include <linux/module.h> +#include <linux/compiler.h> +#include <linux/math.h> #include <linux/string.h> - +#include <linux/types.h> #include <linux/unaligned.h> typedef void (sha512_block_fn)(struct sha512_state *sst, u8 const *src, @@ -53,66 +53,51 @@ static inline int sha512_base_init(struct shash_desc *desc) return 0; } -static inline int sha512_base_do_update(struct shash_desc *desc, - const u8 *data, - unsigned int len, - sha512_block_fn *block_fn) +static inline int sha512_base_do_update_blocks(struct shash_desc *desc, + const u8 *data, + unsigned int len, + sha512_block_fn *block_fn) { + unsigned int remain = len - round_down(len, SHA512_BLOCK_SIZE); struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; + len -= remain; sctx->count[0] += len; if (sctx->count[0] < len) sctx->count[1]++; - - if (unlikely((partial + len) >= SHA512_BLOCK_SIZE)) { - int blocks; - - if (partial) { - int p = SHA512_BLOCK_SIZE - partial; - - memcpy(sctx->buf + partial, data, p); - data += p; - len -= p; - - block_fn(sctx, sctx->buf, 1); - } - - blocks = len / SHA512_BLOCK_SIZE; - len %= SHA512_BLOCK_SIZE; - - if (blocks) { - block_fn(sctx, data, blocks); - data += blocks * SHA512_BLOCK_SIZE; - } - partial = 0; - } - if (len) - memcpy(sctx->buf + partial, data, len); - - return 0; + block_fn(sctx, data, len / SHA512_BLOCK_SIZE); + return remain; } -static inline int sha512_base_do_finalize(struct shash_desc *desc, - sha512_block_fn *block_fn) +static inline int sha512_base_do_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, + sha512_block_fn *block_fn) { - const int bit_offset = SHA512_BLOCK_SIZE - sizeof(__be64[2]); + unsigned int bit_offset = SHA512_BLOCK_SIZE / 8 - 2; struct sha512_state *sctx = shash_desc_ctx(desc); - __be64 *bits = (__be64 *)(sctx->buf + bit_offset); - unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; + union { + __be64 b64[SHA512_BLOCK_SIZE / 4]; + u8 u8[SHA512_BLOCK_SIZE * 2]; + } block = {}; - sctx->buf[partial++] = 0x80; - if (partial > bit_offset) { - memset(sctx->buf + partial, 0x0, SHA512_BLOCK_SIZE - partial); - partial = 0; + if (len >= SHA512_BLOCK_SIZE) { + int remain; - block_fn(sctx, sctx->buf, 1); + remain = sha512_base_do_update_blocks(desc, src, len, block_fn); + src += len - remain; + len = remain; } - memset(sctx->buf + partial, 0x0, bit_offset - partial); - bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); - bits[1] = cpu_to_be64(sctx->count[0] << 3); - block_fn(sctx, sctx->buf, 1); + if (len >= bit_offset * 8) + bit_offset += SHA512_BLOCK_SIZE / 8; + memcpy(&block, src, len); + block.u8[len] = 0x80; + sctx->count[0] += len; + block.b64[bit_offset] = cpu_to_be64(sctx->count[1] << 3 | + sctx->count[0] >> 61); + block.b64[bit_offset + 1] = cpu_to_be64(sctx->count[0] << 3); + block_fn(sctx, block.u8, (bit_offset + 2) * 8 / SHA512_BLOCK_SIZE); + memzero_explicit(&block, sizeof(block)); return 0; } @@ -126,9 +111,10 @@ static inline int sha512_base_finish(struct shash_desc *desc, u8 *out) for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be64)) put_unaligned_be64(sctx->state[i], digest++); - - memzero_explicit(sctx, sizeof(*sctx)); return 0; } +void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src, + int blocks); + #endif /* _CRYPTO_SHA512_BASE_H */ diff --git a/include/crypto/sig.h b/include/crypto/sig.h index 11024708c069..fa6dafafab3f 100644 --- a/include/crypto/sig.h +++ b/include/crypto/sig.h @@ -128,7 +128,7 @@ static inline void crypto_free_sig(struct crypto_sig *tfm) /** * crypto_sig_keysize() - Get key size * - * Function returns the key size in bytes. + * Function returns the key size in bits. * Function assumes that the key is already set in the transformation. If this * function is called without a setkey or with a failed setkey, you may end up * in a NULL dereference. diff --git a/include/crypto/sm3.h b/include/crypto/sm3.h index 1f021ad0533f..c8d02c86c298 100644 --- a/include/crypto/sm3.h +++ b/include/crypto/sm3.h @@ -14,6 +14,7 @@ #define SM3_DIGEST_SIZE 32 #define SM3_BLOCK_SIZE 64 +#define SM3_STATE_SIZE 40 #define SM3_T1 0x79CC4519 #define SM3_T2 0x7A879D8A @@ -58,7 +59,6 @@ static inline void sm3_init(struct sm3_state *sctx) sctx->count = 0; } -void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len); -void sm3_final(struct sm3_state *sctx, u8 *out); +void sm3_block_generic(struct sm3_state *sctx, u8 const *data, int blocks); #endif diff --git a/include/crypto/sm3_base.h b/include/crypto/sm3_base.h index b33ed39c2bce..7c53570bc05e 100644 --- a/include/crypto/sm3_base.h +++ b/include/crypto/sm3_base.h @@ -11,87 +11,59 @@ #include <crypto/internal/hash.h> #include <crypto/sm3.h> -#include <linux/crypto.h> +#include <linux/math.h> #include <linux/module.h> #include <linux/string.h> +#include <linux/types.h> #include <linux/unaligned.h> typedef void (sm3_block_fn)(struct sm3_state *sst, u8 const *src, int blocks); static inline int sm3_base_init(struct shash_desc *desc) { - struct sm3_state *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SM3_IVA; - sctx->state[1] = SM3_IVB; - sctx->state[2] = SM3_IVC; - sctx->state[3] = SM3_IVD; - sctx->state[4] = SM3_IVE; - sctx->state[5] = SM3_IVF; - sctx->state[6] = SM3_IVG; - sctx->state[7] = SM3_IVH; - sctx->count = 0; - + sm3_init(shash_desc_ctx(desc)); return 0; } -static inline int sm3_base_do_update(struct shash_desc *desc, - const u8 *data, - unsigned int len, - sm3_block_fn *block_fn) +static inline int sm3_base_do_update_blocks(struct shash_desc *desc, + const u8 *data, unsigned int len, + sm3_block_fn *block_fn) { + unsigned int remain = len - round_down(len, SM3_BLOCK_SIZE); struct sm3_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx->count % SM3_BLOCK_SIZE; - - sctx->count += len; - - if (unlikely((partial + len) >= SM3_BLOCK_SIZE)) { - int blocks; - - if (partial) { - int p = SM3_BLOCK_SIZE - partial; - - memcpy(sctx->buffer + partial, data, p); - data += p; - len -= p; - block_fn(sctx, sctx->buffer, 1); - } - - blocks = len / SM3_BLOCK_SIZE; - len %= SM3_BLOCK_SIZE; - - if (blocks) { - block_fn(sctx, data, blocks); - data += blocks * SM3_BLOCK_SIZE; - } - partial = 0; - } - if (len) - memcpy(sctx->buffer + partial, data, len); - - return 0; + sctx->count += len - remain; + block_fn(sctx, data, len / SM3_BLOCK_SIZE); + return remain; } -static inline int sm3_base_do_finalize(struct shash_desc *desc, - sm3_block_fn *block_fn) +static inline int sm3_base_do_finup(struct shash_desc *desc, + const u8 *src, unsigned int len, + sm3_block_fn *block_fn) { - const int bit_offset = SM3_BLOCK_SIZE - sizeof(__be64); + unsigned int bit_offset = SM3_BLOCK_SIZE / 8 - 1; struct sm3_state *sctx = shash_desc_ctx(desc); - __be64 *bits = (__be64 *)(sctx->buffer + bit_offset); - unsigned int partial = sctx->count % SM3_BLOCK_SIZE; + union { + __be64 b64[SM3_BLOCK_SIZE / 4]; + u8 u8[SM3_BLOCK_SIZE * 2]; + } block = {}; - sctx->buffer[partial++] = 0x80; - if (partial > bit_offset) { - memset(sctx->buffer + partial, 0x0, SM3_BLOCK_SIZE - partial); - partial = 0; + if (len >= SM3_BLOCK_SIZE) { + int remain; - block_fn(sctx, sctx->buffer, 1); + remain = sm3_base_do_update_blocks(desc, src, len, block_fn); + src += len - remain; + len = remain; } - memset(sctx->buffer + partial, 0x0, bit_offset - partial); - *bits = cpu_to_be64(sctx->count << 3); - block_fn(sctx, sctx->buffer, 1); + if (len >= bit_offset * 8) + bit_offset += SM3_BLOCK_SIZE / 8; + memcpy(&block, src, len); + block.u8[len] = 0x80; + sctx->count += len; + block.b64[bit_offset] = cpu_to_be64(sctx->count << 3); + block_fn(sctx, block.u8, (bit_offset + 1) * 8 / SM3_BLOCK_SIZE); + memzero_explicit(&block, sizeof(block)); return 0; } @@ -104,8 +76,6 @@ static inline int sm3_base_finish(struct shash_desc *desc, u8 *out) for (i = 0; i < SM3_DIGEST_SIZE / sizeof(__be32); i++) put_unaligned_be32(sctx->state[i], digest++); - - memzero_explicit(sctx, sizeof(*sctx)); return 0; } diff --git a/include/crypto/streebog.h b/include/crypto/streebog.h index cae1b4a01971..570f720a113b 100644 --- a/include/crypto/streebog.h +++ b/include/crypto/streebog.h @@ -23,15 +23,10 @@ struct streebog_uint512 { }; struct streebog_state { - union { - u8 buffer[STREEBOG_BLOCK_SIZE]; - struct streebog_uint512 m; - }; struct streebog_uint512 hash; struct streebog_uint512 h; struct streebog_uint512 N; struct streebog_uint512 Sigma; - size_t fillsize; }; #endif /* !_CRYPTO_STREEBOG_H_ */ diff --git a/include/cxl/features.h b/include/cxl/features.h index a3bb34694c06..5f7f842765a5 100644 --- a/include/cxl/features.h +++ b/include/cxl/features.h @@ -66,7 +66,7 @@ struct cxl_memdev; #ifdef CONFIG_CXL_FEATURES inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds); int devm_cxl_setup_features(struct cxl_dev_state *cxlds); -int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd); +int devm_cxl_setup_fwctl(struct device *host, struct cxl_memdev *cxlmd); #else static inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds) { @@ -78,7 +78,8 @@ static inline int devm_cxl_setup_features(struct cxl_dev_state *cxlds) return -EOPNOTSUPP; } -static inline int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd) +static inline int devm_cxl_setup_fwctl(struct device *host, + struct cxl_memdev *cxlmd) { return -EOPNOTSUPP; } diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 2bf893eabb4b..bcd54020d6ba 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -585,8 +585,7 @@ static inline bool drm_gem_object_is_shared_for_memory_stats(struct drm_gem_obje */ static inline bool drm_gem_is_imported(const struct drm_gem_object *obj) { - /* The dma-buf's priv field points to the original GEM object. */ - return obj->dma_buf && (obj->dma_buf->priv != obj); + return !!obj->import_attach; } #ifdef CONFIG_LOCKDEP diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index df120b4d1f83..eaf704d3d05e 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -89,6 +89,7 @@ struct drm_gpusvm_devmem_ops { * @ops: Pointer to the operations structure for GPU SVM device memory * @dpagemap: The struct drm_pagemap of the pages this allocation belongs to. * @size: Size of device memory allocation + * @timeslice_expiration: Timeslice expiration in jiffies */ struct drm_gpusvm_devmem { struct device *dev; @@ -97,6 +98,7 @@ struct drm_gpusvm_devmem { const struct drm_gpusvm_devmem_ops *ops; struct drm_pagemap *dpagemap; size_t size; + u64 timeslice_expiration; }; /** @@ -186,6 +188,31 @@ struct drm_gpusvm_notifier { }; /** + * struct drm_gpusvm_range_flags - Structure representing a GPU SVM range flags + * + * @migrate_devmem: Flag indicating whether the range can be migrated to device memory + * @unmapped: Flag indicating if the range has been unmapped + * @partial_unmap: Flag indicating if the range has been partially unmapped + * @has_devmem_pages: Flag indicating if the range has devmem pages + * @has_dma_mapping: Flag indicating if the range has a DMA mapping + * @__flags: Flags for range in u16 form (used for READ_ONCE) + */ +struct drm_gpusvm_range_flags { + union { + struct { + /* All flags below must be set upon creation */ + u16 migrate_devmem : 1; + /* All flags below must be set / cleared under notifier lock */ + u16 unmapped : 1; + u16 partial_unmap : 1; + u16 has_devmem_pages : 1; + u16 has_dma_mapping : 1; + }; + u16 __flags; + }; +}; + +/** * struct drm_gpusvm_range - Structure representing a GPU SVM range * * @gpusvm: Pointer to the GPU SVM structure @@ -198,11 +225,6 @@ struct drm_gpusvm_notifier { * @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping. * Note this is assuming only one drm_pagemap per range is allowed. * @flags: Flags for range - * @flags.migrate_devmem: Flag indicating whether the range can be migrated to device memory - * @flags.unmapped: Flag indicating if the range has been unmapped - * @flags.partial_unmap: Flag indicating if the range has been partially unmapped - * @flags.has_devmem_pages: Flag indicating if the range has devmem pages - * @flags.has_dma_mapping: Flag indicating if the range has a DMA mapping * * This structure represents a GPU SVM range used for tracking memory ranges * mapped in a DRM device. @@ -216,15 +238,7 @@ struct drm_gpusvm_range { unsigned long notifier_seq; struct drm_pagemap_device_addr *dma_addr; struct drm_pagemap *dpagemap; - struct { - /* All flags below must be set upon creation */ - u16 migrate_devmem : 1; - /* All flags below must be set / cleared under notifier lock */ - u16 unmapped : 1; - u16 partial_unmap : 1; - u16 has_devmem_pages : 1; - u16 has_dma_mapping : 1; - } flags; + struct drm_gpusvm_range_flags flags; }; /** @@ -283,17 +297,22 @@ struct drm_gpusvm { * @check_pages_threshold: Check CPU pages for present if chunk is less than or * equal to threshold. If not present, reduce chunk * size. + * @timeslice_ms: The timeslice MS which in minimum time a piece of memory + * remains with either exclusive GPU or CPU access. * @in_notifier: entering from a MMU notifier * @read_only: operating on read-only memory * @devmem_possible: possible to use device memory + * @devmem_only: use only device memory * * Context that is DRM GPUSVM is operating in (i.e. user arguments). */ struct drm_gpusvm_ctx { unsigned long check_pages_threshold; + unsigned long timeslice_ms; unsigned int in_notifier :1; unsigned int read_only :1; unsigned int devmem_possible :1; + unsigned int devmem_only :1; }; int drm_gpusvm_init(struct drm_gpusvm *gpusvm, diff --git a/include/drm/drm_kunit_helpers.h b/include/drm/drm_kunit_helpers.h index 11d59ce0bac0..1c62d1d4458c 100644 --- a/include/drm/drm_kunit_helpers.h +++ b/include/drm/drm_kunit_helpers.h @@ -118,6 +118,9 @@ drm_kunit_helper_create_crtc(struct kunit *test, const struct drm_crtc_funcs *funcs, const struct drm_crtc_helper_funcs *helper_funcs); +int drm_kunit_add_mode_destroy_action(struct kunit *test, + struct drm_display_mode *mode); + struct drm_display_mode * drm_kunit_display_mode_from_cea_vic(struct kunit *test, struct drm_device *dev, u8 video_code); diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index 4736ea525048..a7ce9523c50d 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -850,6 +850,7 @@ MACRO__(0xE20C, ## __VA_ARGS__), \ MACRO__(0xE20D, ## __VA_ARGS__), \ MACRO__(0xE210, ## __VA_ARGS__), \ + MACRO__(0xE211, ## __VA_ARGS__), \ MACRO__(0xE212, ## __VA_ARGS__), \ MACRO__(0xE215, ## __VA_ARGS__), \ MACRO__(0xE216, ## __VA_ARGS__) @@ -860,6 +861,10 @@ MACRO__(0xB081, ## __VA_ARGS__), \ MACRO__(0xB082, ## __VA_ARGS__), \ MACRO__(0xB083, ## __VA_ARGS__), \ + MACRO__(0xB084, ## __VA_ARGS__), \ + MACRO__(0xB085, ## __VA_ARGS__), \ + MACRO__(0xB086, ## __VA_ARGS__), \ + MACRO__(0xB087, ## __VA_ARGS__), \ MACRO__(0xB08F, ## __VA_ARGS__), \ MACRO__(0xB090, ## __VA_ARGS__), \ MACRO__(0xB0A0, ## __VA_ARGS__), \ diff --git a/include/drm/ttm/ttm_backup.h b/include/drm/ttm/ttm_backup.h index 24ad120b8827..c33cba111171 100644 --- a/include/drm/ttm/ttm_backup.h +++ b/include/drm/ttm/ttm_backup.h @@ -9,14 +9,12 @@ #include <linux/mm_types.h> #include <linux/shmem_fs.h> -struct ttm_backup; - /** * ttm_backup_handle_to_page_ptr() - Convert handle to struct page pointer * @handle: The handle to convert. * * Converts an opaque handle received from the - * struct ttm_backoup_ops::backup_page() function to an (invalid) + * ttm_backup_backup_page() function to an (invalid) * struct page pointer suitable for a struct page array. * * Return: An (invalid) struct page pointer. @@ -45,8 +43,8 @@ static inline bool ttm_backup_page_ptr_is_handle(const struct page *page) * * Return: The handle that was previously used in * ttm_backup_handle_to_page_ptr() to obtain a struct page pointer, suitable - * for use as argument in the struct ttm_backup_ops drop() or - * copy_backed_up_page() functions. + * for use as argument in the struct ttm_backup_drop() or + * ttm_backup_copy_page() functions. */ static inline unsigned long ttm_backup_page_ptr_to_handle(const struct page *page) @@ -55,20 +53,20 @@ ttm_backup_page_ptr_to_handle(const struct page *page) return (unsigned long)page >> 1; } -void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle); +void ttm_backup_drop(struct file *backup, pgoff_t handle); -int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, +int ttm_backup_copy_page(struct file *backup, struct page *dst, pgoff_t handle, bool intr); s64 -ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, +ttm_backup_backup_page(struct file *backup, struct page *page, bool writeback, pgoff_t idx, gfp_t page_gfp, gfp_t alloc_gfp); -void ttm_backup_fini(struct ttm_backup *backup); +void ttm_backup_fini(struct file *backup); u64 ttm_backup_bytes_avail(void); -struct ttm_backup *ttm_backup_shmem_create(loff_t size); +struct file *ttm_backup_shmem_create(loff_t size); #endif diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 13cf47f3322f..406437ad674b 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -118,7 +118,7 @@ struct ttm_tt { * ttm_tt_create() callback is responsible for assigning * this field. */ - struct ttm_backup *backup; + struct file *backup; /** * @caching: The current caching state of the pages, see enum * ttm_caching. diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index abf0bd76e370..68606fa5fe73 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -1013,7 +1013,7 @@ enum hv_register_name { /* * To support arch-generic code calling hv_set/get_register: - * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrl/wrmsrl + * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrq/wrmsrq * - On ARM, HV_MSR_ indicates a VP register accessed via hypercall */ #define HV_MSR_CRASH_P0 (HV_X64_MSR_CRASH_P0) diff --git a/include/kunit/test.h b/include/kunit/test.h index 0ffb97c78566..39c768f87dc9 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -67,7 +67,7 @@ enum kunit_status { /* * Speed Attribute is stored as an enum and separated into categories of - * speed: very_slowm, slow, and normal. These speeds are relative to + * speed: very_slow, slow, and normal. These speeds are relative to * other KUnit tests. * * Note: unset speed attribute acts as default of KUNIT_SPEED_NORMAL. diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index a946e0203e6d..8f7931eb7d16 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -104,6 +104,16 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #else /* ARCH_NEEDS_WEAK_PER_CPU */ +#ifdef MODULE + +#define DEFINE_ALLOC_TAG(_alloc_tag) \ + static struct alloc_tag _alloc_tag __used __aligned(8) \ + __section(ALLOC_TAG_SECTION_NAME) = { \ + .ct = CODE_TAG_INIT, \ + .counters = NULL }; + +#else /* MODULE */ + #define DEFINE_ALLOC_TAG(_alloc_tag) \ static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \ static struct alloc_tag _alloc_tag __used __aligned(8) \ @@ -111,6 +121,8 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); .ct = CODE_TAG_INIT, \ .counters = &_alloc_tag_cntr }; +#endif /* MODULE */ + #endif /* ARCH_NEEDS_WEAK_PER_CPU */ DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 8e7af9a03b41..e721148c95d0 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -249,6 +249,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) { #ifdef CONFIG_LOCKDEP WARN_ON_ONCE(debug_locks && + (inode->i_sb->s_iflags & SB_I_CGROUPWB) && (!lockdep_is_held(&inode->i_lock) && !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) && !lockdep_is_held(&inode->i_wb->list_lock))); diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 1625c8529e70..65abd5ab8836 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -90,7 +90,6 @@ struct linux_binfmt { struct list_head lh; struct module *module; int (*load_binary)(struct linux_binprm *); - int (*load_shlib)(struct file *); #ifdef CONFIG_COREDUMP int (*core_dump)(struct coredump_params *cprm); unsigned long min_coredump; /* minimal dump size */ diff --git a/include/linux/bio.h b/include/linux/bio.h index cafc7c215de8..9c37c66ef9ca 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -11,6 +11,7 @@ #include <linux/uio.h> #define BIO_MAX_VECS 256U +#define BIO_MAX_INLINE_VECS UIO_MAXIOV struct queue_limits; @@ -402,7 +403,6 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) struct request_queue; -extern int submit_bio_wait(struct bio *bio); void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, unsigned short max_vecs, blk_opf_t opf); extern void bio_uninit(struct bio *); @@ -417,6 +417,30 @@ void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, size_t off); +void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len); + +/** + * bio_add_max_vecs - number of bio_vecs needed to add data to a bio + * @kaddr: kernel virtual address to add + * @len: length in bytes to add + * + * Calculate how many bio_vecs need to be allocated to add the kernel virtual + * address range in [@kaddr:@len] in the worse case. + */ +static inline unsigned int bio_add_max_vecs(void *kaddr, unsigned int len) +{ + if (is_vmalloc_addr(kaddr)) + return DIV_ROUND_UP(offset_in_page(kaddr) + len, PAGE_SIZE); + return 1; +} + +unsigned int bio_add_vmalloc_chunk(struct bio *bio, void *vaddr, unsigned len); +bool bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned int len); + +int submit_bio_wait(struct bio *bio); +int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, + size_t len, enum req_op op); + int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); void __bio_release_pages(struct bio *bio, bool mark_dirty); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8eb9b3310167..de8c85a03bb7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -9,6 +9,7 @@ #include <linux/prefetch.h> #include <linux/srcu.h> #include <linux/rw_hint.h> +#include <linux/rwsem.h> struct blk_mq_tags; struct blk_flush_queue; @@ -506,6 +507,9 @@ enum hctx_type { * request_queue.tag_set_list. * @srcu: Use as lock when type of the request queue is blocking * (BLK_MQ_F_BLOCKING). + * @update_nr_hwq_lock: + * Synchronize updating nr_hw_queues with add/del disk & + * switching elevator. */ struct blk_mq_tag_set { const struct blk_mq_ops *ops; @@ -527,6 +531,8 @@ struct blk_mq_tag_set { struct mutex tag_list_lock; struct list_head tag_list; struct srcu_struct *srcu; + + struct rw_semaphore update_nr_hwq_lock; }; /** @@ -1031,8 +1037,8 @@ int blk_rq_map_user_io(struct request *, struct rq_map_data *, int blk_rq_map_user_iov(struct request_queue *, struct request *, struct rq_map_data *, const struct iov_iter *, gfp_t); int blk_rq_unmap_user(struct bio *); -int blk_rq_map_kern(struct request_queue *, struct request *, void *, - unsigned int, gfp_t); +int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len, + gfp_t gfp); int blk_rq_append_bio(struct request *rq, struct bio *bio); void blk_execute_rq_nowait(struct request *rq, bool at_head); blk_status_t blk_execute_rq(struct request *rq, bool at_head); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index dce7615c35e7..3d1577f07c1c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -220,6 +220,7 @@ struct bio { unsigned short bi_flags; /* BIO_* below */ unsigned short bi_ioprio; enum rw_hint bi_write_hint; + u8 bi_write_stream; blk_status_t bi_status; atomic_t __bi_remaining; @@ -286,7 +287,6 @@ struct bio { enum { BIO_PAGE_PINNED, /* Unpin pages in bio_release_pages() */ BIO_CLONED, /* doesn't own data */ - BIO_BOUNCED, /* bio is a bounce bio */ BIO_QUIET, /* Make BIO Quiet */ BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ BIO_REFFED, /* bio has elevated ->bi_cnt */ @@ -296,6 +296,14 @@ enum { * of this bio. */ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ + /* + * This bio has completed bps throttling at the single tg granularity, + * which is different from BIO_BPS_THROTTLED. When the bio is enqueued + * into the sq->queued of the upper tg, or is about to be dispatched, + * this flag needs to be cleared. Since blk-throttle and rq_qos are not + * on the same hierarchical level, reuse the value. + */ + BIO_TG_BPS_THROTTLED = BIO_QOS_THROTTLED, BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e39c45bc0a97..332b56f323d9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -182,7 +182,6 @@ struct gendisk { struct list_head slave_bdevs; #endif struct timer_rand_state *random; - atomic_t sync_io; /* RAID */ struct disk_events *ev; #ifdef CONFIG_BLK_DEV_ZONED @@ -218,6 +217,8 @@ struct gendisk { * devices that do not have multiple independent access ranges. */ struct blk_independent_access_ranges *ia_ranges; + + struct mutex rqos_state_mutex; /* rqos state change mutex */ }; /** @@ -331,9 +332,6 @@ typedef unsigned int __bitwise blk_features_t; /* skip this queue in blk_mq_(un)quiesce_tagset */ #define BLK_FEAT_SKIP_TAGSET_QUIESCE ((__force blk_features_t)(1u << 13)) -/* bounce all highmem pages */ -#define BLK_FEAT_BOUNCE_HIGH ((__force blk_features_t)(1u << 14)) - /* undocumented magic for bcache */ #define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \ ((__force blk_features_t)(1u << 15)) @@ -347,7 +345,7 @@ typedef unsigned int __bitwise blk_features_t; */ #define BLK_FEAT_INHERIT_MASK \ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ - BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH | \ + BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | \ BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE) /* internal flags in queue_limits.flags */ @@ -405,6 +403,9 @@ struct queue_limits { unsigned short max_integrity_segments; unsigned short max_discard_segments; + unsigned short max_write_streams; + unsigned int write_stream_granularity; + unsigned int max_open_zones; unsigned int max_active_zones; @@ -644,6 +645,8 @@ enum { QUEUE_FLAG_RQ_ALLOC_TIME, /* record rq->alloc_time_ns */ QUEUE_FLAG_HCTX_ACTIVE, /* at least one blk-mq hctx is active */ QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */ + QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */ + QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */ QUEUE_FLAG_MAX }; @@ -679,6 +682,10 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); #define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags) #define blk_queue_skip_tagset_quiesce(q) \ ((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE) +#define blk_queue_disable_wbt(q) \ + test_bit(QUEUE_FLAG_DISABLE_WBT_DEF, &(q)->queue_flags) +#define blk_queue_no_elv_switch(q) \ + test_bit(QUEUE_FLAG_NO_ELV_SWITCH, &(q)->queue_flags) extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); @@ -712,23 +719,6 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) (q->limits.features & BLK_FEAT_ZONED); } -#ifdef CONFIG_BLK_DEV_ZONED -static inline unsigned int disk_nr_zones(struct gendisk *disk) -{ - return disk->nr_zones; -} -bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); -#else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int disk_nr_zones(struct gendisk *disk) -{ - return 0; -} -static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) -{ - return false; -} -#endif /* CONFIG_BLK_DEV_ZONED */ - static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { if (!blk_queue_is_zoned(disk->queue)) @@ -736,11 +726,6 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) return sector >> ilog2(disk->queue->limits.chunk_sectors); } -static inline unsigned int bdev_nr_zones(struct block_device *bdev) -{ - return disk_nr_zones(bdev->bd_disk); -} - static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return bdev->bd_disk->queue->limits.max_open_zones; @@ -847,6 +832,51 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) (sb->s_blocksize_bits - SECTOR_SHIFT); } +#ifdef CONFIG_BLK_DEV_ZONED +static inline unsigned int disk_nr_zones(struct gendisk *disk) +{ + return disk->nr_zones; +} +bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); + +/** + * disk_zone_capacity - returns the zone capacity of zone containing @sector + * @disk: disk to work with + * @sector: sector number within the querying zone + * + * Returns the zone capacity of a zone containing @sector. @sector can be any + * sector in the zone. + */ +static inline unsigned int disk_zone_capacity(struct gendisk *disk, + sector_t sector) +{ + sector_t zone_sectors = disk->queue->limits.chunk_sectors; + + if (sector + zone_sectors >= get_capacity(disk)) + return disk->last_zone_capacity; + return disk->zone_capacity; +} +static inline unsigned int bdev_zone_capacity(struct block_device *bdev, + sector_t pos) +{ + return disk_zone_capacity(bdev->bd_disk, pos); +} +#else /* CONFIG_BLK_DEV_ZONED */ +static inline unsigned int disk_nr_zones(struct gendisk *disk) +{ + return 0; +} +static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) +{ + return false; +} +#endif /* CONFIG_BLK_DEV_ZONED */ + +static inline unsigned int bdev_nr_zones(struct block_device *bdev) +{ + return disk_nr_zones(bdev->bd_disk); +} + int bdev_disk_changed(struct gendisk *disk, bool invalidate); void put_disk(struct gendisk *disk); @@ -1265,6 +1295,13 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev) return queue_max_segments(bdev_get_queue(bdev)); } +static inline unsigned short bdev_max_write_streams(struct block_device *bdev) +{ + if (bdev_is_partition(bdev)) + return 0; + return bdev_limits(bdev)->max_write_streams; +} + static inline unsigned queue_logical_block_size(const struct request_queue *q) { return q->limits.logical_block_size; @@ -1614,6 +1651,7 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev); } +int bdev_validate_blocksize(struct block_device *bdev, int block_size); int set_blocksize(struct file *file, int size); int lookup_bdev(const char *pathname, dev_t *dev); @@ -1670,10 +1708,6 @@ int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -/* just for blk-cgroup, don't use elsewhere */ -struct block_device *blkdev_get_no_open(dev_t dev); -void blkdev_put_no_open(struct block_device *bdev); - struct block_device *I_BDEV(struct inode *inode); struct block_device *file_bdev(struct file *bdev_file); bool disk_live(struct gendisk *disk); @@ -1685,7 +1719,7 @@ int sync_blockdev(struct block_device *bdev); int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); -void bdev_statx(struct path *, struct kstat *, u32); +void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask); void printk_all_partitions(void); int __init early_lookup_bdev(const char *pathname, dev_t *dev); #else @@ -1703,8 +1737,8 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } -static inline void bdev_statx(struct path *path, struct kstat *stat, - u32 request_mask) +static inline void bdev_statx(const struct path *path, struct kstat *stat, + u32 request_mask) { } static inline void printk_all_partitions(void) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index f0a4ad7839b6..0029ff880e27 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -34,6 +34,7 @@ enum bh_state_bits { BH_Meta, /* Buffer contains metadata */ BH_Prio, /* Buffer should be submitted with REQ_PRIO */ BH_Defer_Completion, /* Defer AIO completion to workqueue */ + BH_Migrate, /* Buffer is being migrated (norefs) */ BH_PrivateStart,/* not a state bit, but the first bit available * for private allocation by other entities @@ -222,6 +223,8 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); +struct buffer_head *__find_get_block_nonatomic(struct block_device *bdev, + sector_t block, unsigned size); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); @@ -397,6 +400,12 @@ sb_find_get_block(struct super_block *sb, sector_t block) return __find_get_block(sb->s_bdev, block, sb->s_blocksize); } +static inline struct buffer_head * +sb_find_get_block_nonatomic(struct super_block *sb, sector_t block) +{ + return __find_get_block_nonatomic(sb->s_bdev, block, sb->s_blocksize); +} + static inline void map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) { diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d55b30057a45..50b14a5661c7 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -490,9 +490,6 @@ extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages); -extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *, - unsigned int which, - struct ceph_pagelist *pagelist); #ifdef CONFIG_BLOCK void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, unsigned int which, @@ -509,9 +506,6 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, unsigned int which, struct iov_iter *iter); -extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, - unsigned int which, - struct ceph_pagelist *pagelist); extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, unsigned int which, struct page **pages, u64 length, diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 485b651869d9..5bc8f55c8cca 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -710,6 +710,7 @@ struct cgroup_subsys { void (*css_released)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css); + void (*css_killed)(struct cgroup_subsys_state *css); void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu); int (*css_extra_stat_show)(struct seq_file *seq, struct cgroup_subsys_state *css); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 28e999f2c642..166d6de50dbf 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -344,7 +344,7 @@ static inline u64 cgroup_id(const struct cgroup *cgrp) */ static inline bool css_is_dying(struct cgroup_subsys_state *css) { - return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt); + return css->flags & CSS_DYING; } static inline void cgroup_get(struct cgroup *cgrp) @@ -785,6 +785,17 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); +static inline void get_cgroup_ns(struct cgroup_namespace *ns) +{ + refcount_inc(&ns->ns.count); +} + +static inline void put_cgroup_ns(struct cgroup_namespace *ns) +{ + if (refcount_dec_and_test(&ns->ns.count)) + free_cgroup_ns(ns); +} + #else /* !CONFIG_CGROUPS */ static inline void free_cgroup_ns(struct cgroup_namespace *ns) { } @@ -795,19 +806,10 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, return old_ns; } -#endif /* !CONFIG_CGROUPS */ +static inline void get_cgroup_ns(struct cgroup_namespace *ns) { } +static inline void put_cgroup_ns(struct cgroup_namespace *ns) { } -static inline void get_cgroup_ns(struct cgroup_namespace *ns) -{ - if (ns) - refcount_inc(&ns->ns.count); -} - -static inline void put_cgroup_ns(struct cgroup_namespace *ns) -{ - if (ns && refcount_dec_and_test(&ns->ns.count)) - free_cgroup_ns(ns); -} +#endif /* !CONFIG_CGROUPS */ #ifdef CONFIG_CGROUPS diff --git a/include/linux/codetag.h b/include/linux/codetag.h index d14dbd26b370..0ee4c21c6dbc 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -36,10 +36,10 @@ union codetag_ref { struct codetag_type_desc { const char *section; size_t tag_size; - void (*module_load)(struct codetag_type *cttype, - struct codetag_module *cmod); - void (*module_unload)(struct codetag_type *cttype, - struct codetag_module *cmod); + void (*module_load)(struct module *mod, + struct codetag *start, struct codetag *end); + void (*module_unload)(struct module *mod, + struct codetag *start, struct codetag *end); #ifdef CONFIG_MODULES void (*module_replaced)(struct module *mod, struct module *new_mod); bool (*needs_section_mem)(struct module *mod, unsigned long size); diff --git a/include/linux/configfs.h b/include/linux/configfs.h index c771e9d0d0b9..698520b1bfdb 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -120,15 +120,19 @@ struct configfs_attribute { ssize_t (*store)(struct config_item *, const char *, size_t); }; -#define CONFIGFS_ATTR(_pfx, _name) \ +#define CONFIGFS_ATTR_PERM(_pfx, _name, _perm) \ static struct configfs_attribute _pfx##attr_##_name = { \ .ca_name = __stringify(_name), \ - .ca_mode = S_IRUGO | S_IWUSR, \ + .ca_mode = _perm, \ .ca_owner = THIS_MODULE, \ .show = _pfx##_name##_show, \ .store = _pfx##_name##_store, \ } +#define CONFIGFS_ATTR(_pfx, _name) CONFIGFS_ATTR_PERM( \ + _pfx, _name, S_IRUGO | S_IWUSR \ +) + #define CONFIGFS_ATTR_RO(_pfx, _name) \ static struct configfs_attribute _pfx##attr_##_name = { \ .ca_name = __stringify(_name), \ diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 77e6e195d1d6..76e41805b92d 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -28,6 +28,7 @@ struct coredump_params { int vma_count; size_t vma_data_size; struct core_vma_metadata *vma_meta; + struct pid *pid; }; extern unsigned int core_file_note_size_limit; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e3049543008b..e6089abc28e2 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -78,6 +78,10 @@ extern ssize_t cpu_show_gds(struct device *dev, extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_old_microcode(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_indirect_target_selection(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 400fee6427a5..7a5b391dcc01 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -776,8 +776,8 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation); + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq); @@ -840,12 +840,12 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_al(policy, target_freq, @@ -855,6 +855,14 @@ static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_l(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find highest freq at or below target in a table in ascending order */ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy, unsigned int target_freq, @@ -908,12 +916,12 @@ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ah(policy, target_freq, @@ -923,6 +931,14 @@ static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_h(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find closest freq to target in a table in ascending order */ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy, unsigned int target_freq, @@ -993,12 +1009,12 @@ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ac(policy, target_freq, @@ -1008,7 +1024,17 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } -static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_c(policy, target_freq, policy->min, policy->max, efficiencies); +} + +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, + unsigned int min, unsigned int max, + int idx) { unsigned int freq; @@ -1017,11 +1043,13 @@ static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) freq = policy->freq_table[idx].frequency; - return freq == clamp_val(freq, policy->min, policy->max); + return freq == clamp_val(freq, min, max); } static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, + unsigned int min, + unsigned int max, unsigned int relation) { bool efficiencies = policy->efficiencies_available && @@ -1032,29 +1060,26 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, relation &= ~CPUFREQ_RELATION_E; if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)) - return cpufreq_table_index_unsorted(policy, target_freq, - relation); + return cpufreq_table_index_unsorted(policy, target_freq, min, + max, relation); retry: switch (relation) { case CPUFREQ_RELATION_L: - idx = cpufreq_table_find_index_l(policy, target_freq, - efficiencies); + idx = find_index_l(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_H: - idx = cpufreq_table_find_index_h(policy, target_freq, - efficiencies); + idx = find_index_h(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_C: - idx = cpufreq_table_find_index_c(policy, target_freq, - efficiencies); + idx = find_index_c(policy, target_freq, min, max, efficiencies); break; default: WARN_ON_ONCE(1); return 0; } - /* Limit frequency index to honor policy->min/max */ - if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { + /* Limit frequency index to honor min and max */ + if (!cpufreq_is_in_limits(policy, min, max, idx) && efficiencies) { efficiencies = false; goto retry; } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 1987400000b4..df366ee15456 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -60,7 +60,6 @@ enum cpuhp_state { /* PREPARE section invoked on a control CPU */ CPUHP_OFFLINE = 0, CPUHP_CREATE_THREADS, - CPUHP_PERF_PREPARE, CPUHP_PERF_X86_PREPARE, CPUHP_PERF_X86_AMD_UNCORE_PREP, CPUHP_PERF_POWER, diff --git a/include/linux/crc16.h b/include/linux/crc16.h index 9fa74529b317..b861d969b161 100644 --- a/include/linux/crc16.h +++ b/include/linux/crc16.h @@ -15,14 +15,7 @@ #include <linux/types.h> -extern u16 const crc16_table[256]; - -extern u16 crc16(u16 crc, const u8 *buffer, size_t len); - -static inline u16 crc16_byte(u16 crc, const u8 data) -{ - return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff]; -} +u16 crc16(u16 crc, const u8 *p, size_t len); #endif /* __CRC16_H */ diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 69c2e8bb3782..569dc13f139f 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -1,7 +1,4 @@ -/* - * crc32.h - * See linux/lib/crc32.c for license and changes - */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef _LINUX_CRC32_H #define _LINUX_CRC32_H diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 1e3809d28abd..b50f1954d1bb 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -14,8 +14,7 @@ #include <linux/completion.h> #include <linux/errno.h> -#include <linux/list.h> -#include <linux/refcount.h> +#include <linux/refcount_types.h> #include <linux/slab.h> #include <linux/types.h> @@ -51,6 +50,15 @@ #define CRYPTO_ALG_NEED_FALLBACK 0x00000100 /* + * Set if the algorithm data structure should be duplicated into + * kmalloc memory before registration. This is useful for hardware + * that can be disconnected at will. Do not use this if the data + * structure is embedded into a bigger one. Duplicate the overall + * data structure in the driver in that case. + */ +#define CRYPTO_ALG_DUP_FIRST 0x00000200 + +/* * Set if the algorithm has passed automated run-time testing. Note that * if there is no run-time testing for a given algorithm it is considered * to have passed. @@ -125,8 +133,10 @@ */ #define CRYPTO_ALG_FIPS_INTERNAL 0x00020000 -/* Set if the algorithm supports request chains and virtual addresses. */ -#define CRYPTO_ALG_REQ_CHAIN 0x00040000 +/* Set if the algorithm supports virtual addresses. */ +#define CRYPTO_ALG_REQ_VIRT 0x00040000 + +/* The high bits 0xff000000 are reserved for type-specific flags. */ /* * Transform masks and values (for crt_flags). @@ -179,7 +189,6 @@ struct crypto_async_request { struct crypto_tfm *tfm; u32 flags; - int err; }; /** @@ -278,6 +287,7 @@ struct cipher_alg { * to the alignmask of the algorithm being used, in order to * avoid the API having to realign them. Note: the alignmask is * not supported for hash algorithms and is always 0 for them. + * @cra_reqsize: Size of the request context for this algorithm. * @cra_priority: Priority of this transformation implementation. In case * multiple transformations with same @cra_name are available to * the Crypto API, the kernel will use the one with highest @@ -302,17 +312,8 @@ struct cipher_alg { * by @cra_type and @cra_flags above, the associated structure must be * filled with callbacks. This field might be empty. This is the case * for ahash, shash. - * @cra_init: Initialize the cryptographic transformation object. This function - * is used to initialize the cryptographic transformation object. - * This function is called only once at the instantiation time, right - * after the transformation context was allocated. In case the - * cryptographic hardware has some special requirements which need to - * be handled by software, this function shall check for the precise - * requirement of the transformation and put any software fallbacks - * in place. - * @cra_exit: Deinitialize the cryptographic transformation object. This is a - * counterpart to @cra_init, used to remove various changes set in - * @cra_init. + * @cra_init: Deprecated, do not use. + * @cra_exit: Deprecated, do not use. * @cra_u.cipher: Union member which contains a single-block symmetric cipher * definition. See @struct @cipher_alg. * @cra_module: Owner of this transformation implementation. Set to THIS_MODULE @@ -333,6 +334,7 @@ struct crypto_alg { unsigned int cra_blocksize; unsigned int cra_ctxsize; unsigned int cra_alignmask; + unsigned int cra_reqsize; int cra_priority; refcount_t cra_refcnt; @@ -409,9 +411,11 @@ struct crypto_tfm { u32 crt_flags; int node; - + + struct crypto_tfm *fb; + void (*exit)(struct crypto_tfm *tfm); - + struct crypto_alg *__crt_alg; void *__crt_ctx[] CRYPTO_MINALIGN_ATTR; @@ -452,6 +456,11 @@ static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_alignmask; } +static inline unsigned int crypto_tfm_alg_reqsize(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_reqsize; +} + static inline u32 crypto_tfm_get_flags(struct crypto_tfm *tfm) { return tfm->crt_flags; @@ -473,22 +482,44 @@ static inline unsigned int crypto_tfm_ctx_alignment(void) return __alignof__(tfm->__crt_ctx); } -static inline void crypto_reqchain_init(struct crypto_async_request *req) +static inline bool crypto_tfm_is_async(struct crypto_tfm *tfm) { - req->err = -EINPROGRESS; - INIT_LIST_HEAD(&req->list); + return tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; } -static inline void crypto_request_chain(struct crypto_async_request *req, - struct crypto_async_request *head) +static inline bool crypto_req_on_stack(struct crypto_async_request *req) { - req->err = -EINPROGRESS; - list_add_tail(&req->list, &head->list); + return req->flags & CRYPTO_TFM_REQ_ON_STACK; } -static inline bool crypto_tfm_is_async(struct crypto_tfm *tfm) +static inline void crypto_request_set_callback( + struct crypto_async_request *req, u32 flags, + crypto_completion_t compl, void *data) { - return tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; + u32 keep = CRYPTO_TFM_REQ_ON_STACK; + + req->complete = compl; + req->data = data; + req->flags &= keep; + req->flags |= flags & ~keep; +} + +static inline void crypto_request_set_tfm(struct crypto_async_request *req, + struct crypto_tfm *tfm) +{ + req->tfm = tfm; + req->flags &= ~CRYPTO_TFM_REQ_ON_STACK; +} + +struct crypto_async_request *crypto_request_clone( + struct crypto_async_request *req, size_t total, gfp_t gfp); + +static inline void crypto_stack_request_init(struct crypto_async_request *req, + struct crypto_tfm *tfm) +{ + req->flags = 0; + crypto_request_set_tfm(req, tfm); + req->flags |= CRYPTO_TFM_REQ_ON_STACK; } #endif /* _LINUX_CRYPTO_H */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 8d1395f945bf..e29823c701ac 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -57,7 +57,8 @@ struct qstr { }; #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } -#define QSTR(n) (struct qstr)QSTR_INIT(n, strlen(n)) +#define QSTR_LEN(n,l) (struct qstr)QSTR_INIT(n,l) +#define QSTR(n) QSTR_LEN(n, strlen(n)) extern const struct qstr empty_name; extern const struct qstr slash_name; @@ -173,65 +174,59 @@ struct dentry_operations { */ /* d_flags entries */ -#define DCACHE_OP_HASH BIT(0) -#define DCACHE_OP_COMPARE BIT(1) -#define DCACHE_OP_REVALIDATE BIT(2) -#define DCACHE_OP_DELETE BIT(3) -#define DCACHE_OP_PRUNE BIT(4) - -#define DCACHE_DISCONNECTED BIT(5) - /* This dentry is possibly not currently connected to the dcache tree, in - * which case its parent will either be itself, or will have this flag as - * well. nfsd will not use a dentry with this bit set, but will first - * endeavour to clear the bit either by discovering that it is connected, - * or by performing lookup operations. Any filesystem which supports - * nfsd_operations MUST have a lookup function which, if it finds a - * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that - * dentry into place and return that dentry rather than the passed one, - * typically using d_splice_alias. */ - -#define DCACHE_REFERENCED BIT(6) /* Recently used, don't discard. */ - -#define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */ - -#define DCACHE_CANT_MOUNT BIT(8) -#define DCACHE_GENOCIDE BIT(9) -#define DCACHE_SHRINK_LIST BIT(10) - -#define DCACHE_OP_WEAK_REVALIDATE BIT(11) - -#define DCACHE_NFSFS_RENAMED BIT(12) - /* this dentry has been "silly renamed" and has to be deleted on the last - * dput() */ -#define DCACHE_FSNOTIFY_PARENT_WATCHED BIT(13) - /* Parent inode is watched by some fsnotify listener */ - -#define DCACHE_DENTRY_KILLED BIT(14) - -#define DCACHE_MOUNTED BIT(15) /* is a mountpoint */ -#define DCACHE_NEED_AUTOMOUNT BIT(16) /* handle automount on this dir */ -#define DCACHE_MANAGE_TRANSIT BIT(17) /* manage transit from this dirent */ +enum dentry_flags { + DCACHE_OP_HASH = BIT(0), + DCACHE_OP_COMPARE = BIT(1), + DCACHE_OP_REVALIDATE = BIT(2), + DCACHE_OP_DELETE = BIT(3), + DCACHE_OP_PRUNE = BIT(4), + /* + * This dentry is possibly not currently connected to the dcache tree, + * in which case its parent will either be itself, or will have this + * flag as well. nfsd will not use a dentry with this bit set, but will + * first endeavour to clear the bit either by discovering that it is + * connected, or by performing lookup operations. Any filesystem which + * supports nfsd_operations MUST have a lookup function which, if it + * finds a directory inode with a DCACHE_DISCONNECTED dentry, will + * d_move that dentry into place and return that dentry rather than the + * passed one, typically using d_splice_alias. + */ + DCACHE_DISCONNECTED = BIT(5), + DCACHE_REFERENCED = BIT(6), /* Recently used, don't discard. */ + DCACHE_DONTCACHE = BIT(7), /* Purge from memory on final dput() */ + DCACHE_CANT_MOUNT = BIT(8), + DCACHE_GENOCIDE = BIT(9), + DCACHE_SHRINK_LIST = BIT(10), + DCACHE_OP_WEAK_REVALIDATE = BIT(11), + /* + * this dentry has been "silly renamed" and has to be deleted on the + * last dput() + */ + DCACHE_NFSFS_RENAMED = BIT(12), + DCACHE_FSNOTIFY_PARENT_WATCHED = BIT(13), /* Parent inode is watched by some fsnotify listener */ + DCACHE_DENTRY_KILLED = BIT(14), + DCACHE_MOUNTED = BIT(15), /* is a mountpoint */ + DCACHE_NEED_AUTOMOUNT = BIT(16), /* handle automount on this dir */ + DCACHE_MANAGE_TRANSIT = BIT(17), /* manage transit from this dirent */ + DCACHE_LRU_LIST = BIT(18), + DCACHE_ENTRY_TYPE = (7 << 19), /* bits 19..21 are for storing type: */ + DCACHE_MISS_TYPE = (0 << 19), /* Negative dentry */ + DCACHE_WHITEOUT_TYPE = (1 << 19), /* Whiteout dentry (stop pathwalk) */ + DCACHE_DIRECTORY_TYPE = (2 << 19), /* Normal directory */ + DCACHE_AUTODIR_TYPE = (3 << 19), /* Lookupless directory (presumed automount) */ + DCACHE_REGULAR_TYPE = (4 << 19), /* Regular file type */ + DCACHE_SPECIAL_TYPE = (5 << 19), /* Other file type */ + DCACHE_SYMLINK_TYPE = (6 << 19), /* Symlink */ + DCACHE_NOKEY_NAME = BIT(22), /* Encrypted name encoded without key */ + DCACHE_OP_REAL = BIT(23), + DCACHE_PAR_LOOKUP = BIT(24), /* being looked up (with parent locked shared) */ + DCACHE_DENTRY_CURSOR = BIT(25), + DCACHE_NORCU = BIT(26), /* No RCU delay for freeing */ +}; + #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) -#define DCACHE_LRU_LIST BIT(18) - -#define DCACHE_ENTRY_TYPE (7 << 19) /* bits 19..21 are for storing type: */ -#define DCACHE_MISS_TYPE (0 << 19) /* Negative dentry */ -#define DCACHE_WHITEOUT_TYPE (1 << 19) /* Whiteout dentry (stop pathwalk) */ -#define DCACHE_DIRECTORY_TYPE (2 << 19) /* Normal directory */ -#define DCACHE_AUTODIR_TYPE (3 << 19) /* Lookupless directory (presumed automount) */ -#define DCACHE_REGULAR_TYPE (4 << 19) /* Regular file type */ -#define DCACHE_SPECIAL_TYPE (5 << 19) /* Other file type */ -#define DCACHE_SYMLINK_TYPE (6 << 19) /* Symlink */ - -#define DCACHE_NOKEY_NAME BIT(22) /* Encrypted name encoded without key */ -#define DCACHE_OP_REAL BIT(23) - -#define DCACHE_PAR_LOOKUP BIT(24) /* being looked up (with parent locked shared) */ -#define DCACHE_DENTRY_CURSOR BIT(25) -#define DCACHE_NORCU BIT(26) /* No RCU delay for freeing */ - extern seqlock_t rename_lock; /* @@ -287,7 +282,6 @@ extern void d_exchange(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); extern struct dentry *d_lookup(const struct dentry *, const struct qstr *); -extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); static inline unsigned d_count(const struct dentry *dentry) { diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index d02f32b7514e..0864773a57e8 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -18,15 +18,16 @@ static inline int devcgroup_inode_permission(struct inode *inode, int mask) { short type, access = 0; + if (likely(!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode))) + return 0; + if (likely(!inode->i_rdev)) return 0; if (S_ISBLK(inode->i_mode)) type = DEVCG_DEV_BLOCK; - else if (S_ISCHR(inode->i_mode)) + else /* S_ISCHR by the test above */ type = DEVCG_DEV_CHAR; - else - return 0; if (mask & MAY_WRITE) access |= DEVCG_ACC_WRITE; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index b79925b1c433..85ab710ec0e7 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -629,10 +629,14 @@ static inline int dma_mmap_wc(struct device *dev, #else #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) #define DEFINE_DMA_UNMAP_LEN(LEN_NAME) -#define dma_unmap_addr(PTR, ADDR_NAME) (0) -#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define dma_unmap_len(PTR, LEN_NAME) (0) -#define dma_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) +#define dma_unmap_addr(PTR, ADDR_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) +#define dma_unmap_len(PTR, LEN_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_len_set(PTR, LEN_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) #endif #endif /* _LINUX_DMA_MAPPING_H */ diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index f632ecfb4238..06c4de602b2f 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -11,6 +11,7 @@ #ifndef LINUX_DMAPOOL_H #define LINUX_DMAPOOL_H +#include <linux/nodemask_types.h> #include <linux/scatterlist.h> #include <asm/io.h> @@ -18,8 +19,8 @@ struct device; #ifdef CONFIG_HAS_DMA -struct dma_pool *dma_pool_create(const char *name, struct device *dev, - size_t size, size_t align, size_t allocation); +struct dma_pool *dma_pool_create_node(const char *name, struct device *dev, + size_t size, size_t align, size_t boundary, int node); void dma_pool_destroy(struct dma_pool *pool); @@ -35,9 +36,12 @@ struct dma_pool *dmam_pool_create(const char *name, struct device *dev, void dmam_pool_destroy(struct dma_pool *pool); #else /* !CONFIG_HAS_DMA */ -static inline struct dma_pool *dma_pool_create(const char *name, - struct device *dev, size_t size, size_t align, size_t allocation) -{ return NULL; } +static inline struct dma_pool *dma_pool_create_node(const char *name, + struct device *dev, size_t size, size_t align, size_t boundary, + int node) +{ + return NULL; +} static inline void dma_pool_destroy(struct dma_pool *pool) { } static inline void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle) { return NULL; } @@ -49,6 +53,13 @@ static inline struct dma_pool *dmam_pool_create(const char *name, static inline void dmam_pool_destroy(struct dma_pool *pool) { } #endif /* !CONFIG_HAS_DMA */ +static inline struct dma_pool *dma_pool_create(const char *name, + struct device *dev, size_t size, size_t align, size_t boundary) +{ + return dma_pool_create_node(name, dev, size, align, boundary, + NUMA_NO_NODE); +} + static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle) { diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index fc61d0205c97..f94f3fdf15fc 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -14,6 +14,7 @@ #include <linux/kmsan.h> #include <asm/entry-common.h> +#include <asm/syscall.h> /* * Define dummy _TIF work flags if not defined by the architecture or for @@ -367,6 +368,15 @@ static __always_inline void exit_to_user_mode(void) } /** + * syscall_exit_work - Handle work before returning to user mode + * @regs: Pointer to current pt_regs + * @work: Current thread syscall work + * + * Do one-time syscall specific work. + */ +void syscall_exit_work(struct pt_regs *regs, unsigned long work); + +/** * syscall_exit_to_user_mode_work - Handle work before returning to user mode * @regs: Pointer to currents pt_regs * @@ -379,7 +389,30 @@ static __always_inline void exit_to_user_mode(void) * make the final state transitions. Interrupts must stay disabled between * return from this function and the invocation of exit_to_user_mode(). */ -void syscall_exit_to_user_mode_work(struct pt_regs *regs); +static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) +{ + unsigned long work = READ_ONCE(current_thread_info()->syscall_work); + unsigned long nr = syscall_get_nr(current, regs); + + CT_WARN_ON(ct_state() != CT_STATE_KERNEL); + + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) + local_irq_enable(); + } + + rseq_syscall(regs); + + /* + * Do one-time syscall specific work. If these work items are + * enabled, we want to run them exactly once per syscall exit with + * interrupts enabled. + */ + if (unlikely(work & SYSCALL_WORK_EXIT)) + syscall_exit_work(regs, work); + local_irq_disable_exit_to_user(); + exit_to_user_mode_prepare(regs); +} /** * syscall_exit_to_user_mode - Handle work before returning to user mode @@ -410,7 +443,13 @@ void syscall_exit_to_user_mode_work(struct pt_regs *regs); * exit_to_user_mode(). This function is preferred unless there is a * compelling architectural reason to use the separate functions. */ -void syscall_exit_to_user_mode(struct pt_regs *regs); +static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs) +{ + instrumentation_begin(); + syscall_exit_to_user_mode_work(regs); + instrumentation_end(); + exit_to_user_mode(); +} /** * irqentry_enter_from_user_mode - Establish state before invoking the irq handler diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 65655a5d1be2..ca42d5e46ccc 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/moduleloader.h> +#include <linux/cleanup.h> #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ !defined(CONFIG_KASAN_VMALLOC) @@ -53,7 +54,7 @@ enum execmem_range_flags { EXECMEM_ROX_CACHE = (1 << 1), }; -#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX +#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM) /** * execmem_fill_trapping_insns - set memory to contain instructions that * will trap @@ -93,9 +94,15 @@ int execmem_make_temp_rw(void *ptr, size_t size); * Return: 0 on success or negative error code on failure. */ int execmem_restore_rox(void *ptr, size_t size); + +/* + * Called from mark_readonly(), where the system transitions to ROX. + */ +void execmem_cache_make_ro(void); #else static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } +static inline void execmem_cache_make_ro(void) { } #endif /** @@ -170,6 +177,8 @@ void *execmem_alloc(enum execmem_type type, size_t size); */ void execmem_free(void *ptr); +DEFINE_FREE(execmem, void *, if (_T) execmem_free(_T)); + #ifdef CONFIG_MMU /** * execmem_vmap - create virtual mapping for EXECMEM_MODULE_DATA memory diff --git a/include/linux/file.h b/include/linux/file.h index 302f11355b10..af1768d934a0 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -59,7 +59,7 @@ static inline struct fd CLONED_FD(struct file *f) static inline void fdput(struct fd fd) { - if (fd.word & FDPUT_FPUT) + if (unlikely(fd.word & FDPUT_FPUT)) fput(fd_file(fd)); } diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h index 7db62fbc0500..31551e4cb8f3 100644 --- a/include/linux/file_ref.h +++ b/include/linux/file_ref.h @@ -61,7 +61,6 @@ static inline void file_ref_init(file_ref_t *ref, unsigned long cnt) atomic_long_set(&ref->refcnt, cnt - 1); } -bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt); bool __file_ref_put(file_ref_t *ref, unsigned long cnt); /** @@ -178,20 +177,14 @@ static __always_inline __must_check bool file_ref_put(file_ref_t *ref) */ static __always_inline __must_check bool file_ref_put_close(file_ref_t *ref) { - long old, new; + long old; old = atomic_long_read(&ref->refcnt); - do { - if (unlikely(old < 0)) - return __file_ref_put_badval(ref, old); - - if (old == FILE_REF_ONEREF) - new = FILE_REF_DEAD; - else - new = old - 1; - } while (!atomic_long_try_cmpxchg(&ref->refcnt, &old, new)); - - return new == FILE_REF_DEAD; + if (likely(old == FILE_REF_ONEREF)) { + if (likely(atomic_long_try_cmpxchg(&ref->refcnt, &old, FILE_REF_DEAD))) + return true; + } + return file_ref_put(ref); } /** diff --git a/include/linux/firmware/cirrus/cs_dsp_test_utils.h b/include/linux/firmware/cirrus/cs_dsp_test_utils.h index 4f87a908ab4f..ecd821ed8064 100644 --- a/include/linux/firmware/cirrus/cs_dsp_test_utils.h +++ b/include/linux/firmware/cirrus/cs_dsp_test_utils.h @@ -104,7 +104,6 @@ unsigned int cs_dsp_mock_num_dsp_words_to_num_packed_regs(unsigned int num_dsp_w unsigned int cs_dsp_mock_xm_header_get_alg_base_in_words(struct cs_dsp_test *priv, unsigned int alg_id, int mem_type); -unsigned int cs_dsp_mock_xm_header_get_fw_version_from_regmap(struct cs_dsp_test *priv); unsigned int cs_dsp_mock_xm_header_get_fw_version(struct cs_dsp_mock_xm_header *header); void cs_dsp_mock_xm_header_drop_from_regmap_cache(struct cs_dsp_test *priv); int cs_dsp_mock_xm_header_write_to_regmap(struct cs_dsp_mock_xm_header *header); diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..0db87f8e676c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -408,6 +408,7 @@ struct kiocb { void *private; int ki_flags; u16 ki_ioprio; /* See linux/ioprio.h */ + u8 ki_write_stream; union { /* * Only used for async buffered reads, where it denotes the @@ -433,7 +434,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb) } struct address_space_operations { - int (*writepage)(struct page *page, struct writeback_control *wbc); int (*read_folio)(struct file *, struct folio *); /* Write back some dirty pages from this mapping. */ @@ -867,6 +867,11 @@ static inline void inode_lock(struct inode *inode) down_write(&inode->i_rwsem); } +static inline __must_check int inode_lock_killable(struct inode *inode) +{ + return down_write_killable(&inode->i_rwsem); +} + static inline void inode_unlock(struct inode *inode) { up_write(&inode->i_rwsem); @@ -877,6 +882,11 @@ static inline void inode_lock_shared(struct inode *inode) down_read(&inode->i_rwsem); } +static inline __must_check int inode_lock_shared_killable(struct inode *inode) +{ + return down_read_killable(&inode->i_rwsem); +} + static inline void inode_unlock_shared(struct inode *inode) { up_read(&inode->i_rwsem); @@ -1307,6 +1317,7 @@ struct sb_writers { unsigned short frozen; /* Is sb frozen? */ int freeze_kcount; /* How many kernel freeze requests? */ int freeze_ucount; /* How many userspace freeze requests? */ + const void *freeze_owner; /* Owner of the freeze */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; @@ -1780,7 +1791,7 @@ static inline void __sb_end_write(struct super_block *sb, int level) static inline void __sb_start_write(struct super_block *sb, int level) { - percpu_down_read(sb->s_writers.rw_sem + level - 1); + percpu_down_read_freezable(sb->s_writers.rw_sem + level - 1, true); } static inline bool __sb_start_write_trylock(struct super_block *sb, int level) @@ -2071,8 +2082,18 @@ typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, struct dir_context { filldir_t actor; loff_t pos; + /* + * Filesystems MUST NOT MODIFY count, but may use as a hint: + * 0 unknown + * > 0 space in buffer (assume at least one entry) + * INT_MAX unlimited + */ + int count; }; +/* If OR-ed with d_type, pending signals are not checked */ +#define FILLDIR_FLAG_NOINTR 0x1000 + /* * These flags let !MMU mmap() govern direct device mapping vs immediate * copying more easily for MAP_PRIVATE, especially for ROM filesystems. @@ -2186,7 +2207,7 @@ struct file_operations { /* Supports asynchronous lock callbacks */ #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) /* File system supports uncached read/write buffered IO */ -#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) +#define FOP_DONTCACHE 0 /* ((__force fop_flags_t)(1 << 7)) */ /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, @@ -2269,6 +2290,7 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed + * @FREEZE_EXCL: a freeze that can only be undone by the owner * * Indicate who the owner of the freeze or thaw request is and whether * the freeze needs to be exclusive or can nest. @@ -2282,6 +2304,7 @@ enum freeze_holder { FREEZE_HOLDER_KERNEL = (1U << 0), FREEZE_HOLDER_USERSPACE = (1U << 1), FREEZE_MAY_NEST = (1U << 2), + FREEZE_EXCL = (1U << 3), }; struct super_operations { @@ -2295,9 +2318,9 @@ struct super_operations { void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); - int (*freeze_super) (struct super_block *, enum freeze_holder who); + int (*freeze_super) (struct super_block *, enum freeze_holder who, const void *owner); int (*freeze_fs) (struct super_block *); - int (*thaw_super) (struct super_block *, enum freeze_holder who); + int (*thaw_super) (struct super_block *, enum freeze_holder who, const void *owner); int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); @@ -2344,6 +2367,7 @@ struct super_operations { #define S_CASEFOLD (1 << 15) /* Casefolded file */ #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ #define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ +#define S_ANON_INODE (1 << 19) /* Inode is an anonymous inode */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -2400,6 +2424,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) +#define IS_ANON_FILE(inode) ((inode)->i_flags & S_ANON_INODE) static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap, struct inode *inode) @@ -2705,8 +2730,10 @@ extern int unregister_filesystem(struct file_system_type *); extern int vfs_statfs(const struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); -int freeze_super(struct super_block *super, enum freeze_holder who); -int thaw_super(struct super_block *super, enum freeze_holder who); +int freeze_super(struct super_block *super, enum freeze_holder who, + const void *freeze_owner); +int thaw_super(struct super_block *super, enum freeze_holder who, + const void *freeze_owner); extern __printf(2, 3) int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); extern int super_setup_bdi(struct super_block *sb); @@ -3475,7 +3502,8 @@ void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); void generic_fill_statx_atomic_writes(struct kstat *stat, unsigned int unit_min, - unsigned int unit_max); + unsigned int unit_max, + unsigned int unit_max_opt); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); @@ -3515,9 +3543,11 @@ extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); -extern void iterate_supers(void (*)(struct super_block *, void *), void *); +extern void iterate_supers(void (*f)(struct super_block *, void *), void *arg); extern void iterate_supers_type(struct file_system_type *, void (*)(struct super_block *, void *), void *); +void filesystems_freeze(void); +void filesystems_thaw(void); extern int dcache_dir_open(struct inode *, struct file *); extern int dcache_dir_close(struct inode *, struct file *); diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index 53e566efd5fd..5a0e897cae80 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -87,14 +87,9 @@ extern int lookup_constant(const struct constant_table tbl[], const char *name, extern const struct constant_table bool_names[]; #ifdef CONFIG_VALIDATE_FS_PARSER -extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, - int low, int high, int special); extern bool fs_validate_description(const char *name, const struct fs_parameter_spec *desc); #else -static inline bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, - int low, int high, int special) -{ return true; } static inline bool fs_validate_description(const char *name, const struct fs_parameter_spec *desc) { return true; } @@ -125,8 +120,6 @@ static inline bool fs_validate_description(const char *name, #define fsparam_u32(NAME, OPT) __fsparam(fs_param_is_u32, NAME, OPT, 0, NULL) #define fsparam_u32oct(NAME, OPT) \ __fsparam(fs_param_is_u32, NAME, OPT, 0, (void *)8) -#define fsparam_u32hex(NAME, OPT) \ - __fsparam(fs_param_is_u32_hex, NAME, OPT, 0, (void *)16) #define fsparam_s32(NAME, OPT) __fsparam(fs_param_is_s32, NAME, OPT, 0, NULL) #define fsparam_u64(NAME, OPT) __fsparam(fs_param_is_u64, NAME, OPT, 0, NULL) #define fsparam_enum(NAME, OPT, array) __fsparam(fs_param_is_enum, NAME, OPT, 0, array) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 6cd8d1d28b8b..fc27b53c58c2 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -907,21 +907,6 @@ extern void fsnotify_wait_marks_destroyed(void); /* Clear all of the marks of a group attached to a given object type */ extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int obj_type); -/* run all the marks in a group, and clear all of the vfsmount marks */ -static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); -} -/* run all the marks in a group, and clear all of the inode marks */ -static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE); -} -/* run all the marks in a group, and clear all of the sn marks */ -static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB); -} extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); diff --git a/include/linux/futex.h b/include/linux/futex.h index b70df27d7e85..005b040c4791 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -4,11 +4,11 @@ #include <linux/sched.h> #include <linux/ktime.h> +#include <linux/mm_types.h> #include <uapi/linux/futex.h> struct inode; -struct mm_struct; struct task_struct; /* @@ -34,6 +34,7 @@ union futex_key { u64 i_seq; unsigned long pgoff; unsigned int offset; + /* unsigned int node; */ } shared; struct { union { @@ -42,11 +43,13 @@ union futex_key { }; unsigned long address; unsigned int offset; + /* unsigned int node; */ } private; struct { u64 ptr; unsigned long word; unsigned int offset; + unsigned int node; /* NOT hashed! */ } both; }; @@ -77,7 +80,25 @@ void futex_exec_release(struct task_struct *tsk); long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); -#else +int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4); + +#ifdef CONFIG_FUTEX_PRIVATE_HASH +int futex_hash_allocate_default(void); +void futex_hash_free(struct mm_struct *mm); + +static inline void futex_mm_init(struct mm_struct *mm) +{ + RCU_INIT_POINTER(mm->futex_phash, NULL); + mutex_init(&mm->futex_hash_lock); +} + +#else /* !CONFIG_FUTEX_PRIVATE_HASH */ +static inline int futex_hash_allocate_default(void) { return 0; } +static inline void futex_hash_free(struct mm_struct *mm) { } +static inline void futex_mm_init(struct mm_struct *mm) { } +#endif /* CONFIG_FUTEX_PRIVATE_HASH */ + +#else /* !CONFIG_FUTEX */ static inline void futex_init_task(struct task_struct *tsk) { } static inline void futex_exit_recursive(struct task_struct *tsk) { } static inline void futex_exit_release(struct task_struct *tsk) { } @@ -88,6 +109,17 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val, { return -EINVAL; } +static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4) +{ + return -EINVAL; +} +static inline int futex_hash_allocate_default(void) +{ + return 0; +} +static inline void futex_hash_free(struct mm_struct *mm) { } +static inline void futex_mm_init(struct mm_struct *mm) { } + #endif #endif diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 6fa0a268d538..097be89487bf 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -2,6 +2,11 @@ /* * fwnode.h - Firmware device node object handle type definition. * + * This header file provides low-level data types and definitions for firmware + * and device property providers. The respective API header files supplied by + * them should contain all of the requisite data types and definitions for end + * users, so including it directly should not be necessary. + * * Copyright (C) 2015, Intel Corporation * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> */ diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 45b651c05b9c..8adc8e9cb4a7 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -31,6 +31,7 @@ struct gpio_descs { #define GPIOD_FLAGS_BIT_DIR_OUT BIT(1) #define GPIOD_FLAGS_BIT_DIR_VAL BIT(2) #define GPIOD_FLAGS_BIT_OPEN_DRAIN BIT(3) +/* GPIOD_FLAGS_BIT_NONEXCLUSIVE is DEPRECATED, don't use in new code. */ #define GPIOD_FLAGS_BIT_NONEXCLUSIVE BIT(4) /** diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 5c6bea81a90e..c698f8415675 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -461,7 +461,7 @@ static inline void memcpy_from_folio(char *to, struct folio *folio, const char *from = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -489,7 +489,7 @@ static inline void memcpy_to_folio(struct folio *folio, size_t offset, char *to = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -522,7 +522,7 @@ static inline __must_check void *folio_zero_tail(struct folio *folio, { size_t len = folio_size(folio) - offset; - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -560,7 +560,7 @@ static inline void folio_fill_tail(struct folio *folio, size_t offset, VM_BUG_ON(offset + len > folio_size(folio)); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -597,7 +597,7 @@ static inline size_t memcpy_from_file_folio(char *to, struct folio *folio, size_t offset = offset_in_folio(folio, pos); char *from = kmap_local_folio(folio, offset); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { offset = offset_in_page(offset); len = min_t(size_t, len, PAGE_SIZE - offset); } else diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1adcba3ddd76..1ef867bb8c44 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -345,7 +345,7 @@ static inline void hrtimer_update_function(struct hrtimer *timer, if (WARN_ON_ONCE(!function)) return; #endif - timer->function = function; + ACCESS_PRIVATE(timer, function) = function; } /* Forward a hrtimer so it expires after now: */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8f3ac832ee7f..4861a7e304bb 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma, bool is_hugetlb_entry_migration(pte_t pte); bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); +void fixup_hugetlb_reservations(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ @@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } +static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ +} + #endif /* !CONFIG_HUGETLB_PAGE */ #ifndef pgd_write diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 675959fb97ba..b52ac40d5830 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1002,6 +1002,12 @@ struct vmbus_channel { /* The max size of a packet on this channel */ u32 max_pkt_size; + + /* function to mmap ring buffer memory to the channel's sysfs ring attribute */ + int (*mmap_ring_buffer)(struct vmbus_channel *channel, struct vm_area_struct *vma); + + /* boolean to control visibility of sysfs for ring buffer */ + bool ring_sysfs_visible; }; #define lock_requestor(channel, flags) \ @@ -1161,13 +1167,6 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel, enum vmbus_packet_type type, u32 flags); -extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, - struct hv_page_buffer pagebuffers[], - u32 pagecount, - void *buffer, - u32 bufferlen, - u64 requestid); - extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *mpb, u32 desc_size, diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 508d466de1cc..457b4fba88bd 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1526,7 +1526,7 @@ struct ieee80211_mgmt { struct { u8 action_code; u8 dialog_token; - u8 status_code; + __le16 status_code; u8 variable[]; } __packed ttlm_res; struct { diff --git a/include/linux/io.h b/include/linux/io.h index 6a6bc4d46d0a..0642c7ee41db 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -183,4 +183,25 @@ static inline void arch_io_free_memtype_wc(resource_size_t base, int devm_arch_io_reserve_memtype_wc(struct device *dev, resource_size_t start, resource_size_t size); +#ifdef CONFIG_STRICT_DEVMEM +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + u64 from = ((u64)pfn) << PAGE_SHIFT; + u64 to = from + size; + u64 cursor = from; + + while (cursor < to) { + if (!devmem_is_allowed(pfn)) + return 0; + cursor += PAGE_SIZE; + pfn++; + } + return 1; +} +#else +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + return 1; +} +#endif #endif /* _LINUX_IO_H */ diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 0634a3de1782..53408124c1e5 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -140,6 +140,15 @@ static inline struct io_uring_cmd_data *io_uring_cmd_get_async_data(struct io_ur return cmd_to_io_kiocb(cmd)->async_data; } +/* + * Return uring_cmd's context reference as its context handle for driver to + * track per-context resource, such as registered kernel IO buffer + */ +static inline void *io_uring_cmd_ctx_handle(struct io_uring_cmd *cmd) +{ + return cmd_to_io_kiocb(cmd)->ctx; +} + int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, void (*release)(void *), unsigned int index, unsigned int issue_flags); diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index b44d201520d8..2922635986f5 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -40,8 +40,6 @@ enum io_uring_cmd_flags { IO_URING_F_TASK_DEAD = (1 << 13), }; -struct io_zcrx_ifq; - struct io_wq_work_node { struct io_wq_work_node *next; }; @@ -343,7 +341,6 @@ struct io_ring_ctx { unsigned cached_cq_tail; unsigned cq_entries; struct io_ev_fd __rcu *io_ev_fd; - unsigned cq_extra; void *cq_wait_arg; size_t cq_wait_size; @@ -394,7 +391,8 @@ struct io_ring_ctx { struct wait_queue_head poll_wq; struct io_restriction restrictions; - struct io_zcrx_ifq *ifq; + /* Stores zcrx object pointers of type struct io_zcrx_ifq */ + struct xarray zcrx_ctxs; u32 pers_next; struct xarray personalities; @@ -418,6 +416,7 @@ struct io_ring_ctx { struct callback_head poll_wq_task_work; struct list_head defer_list; + unsigned nr_drained; struct io_alloc_cache msg_cache; spinlock_t msg_lock; @@ -436,6 +435,7 @@ struct io_ring_ctx { /* protected by ->completion_lock */ unsigned evfd_last_cq_tail; + unsigned nr_req_allocated; /* * Protection for resize vs mmap races - both the mmap and resize @@ -448,8 +448,6 @@ struct io_ring_ctx { struct io_mapped_region ring_region; /* used for optimised request parameter and wait argument passing */ struct io_mapped_region param_region; - /* just one zcrx per ring for now, will move to io_zcrx_ifq eventually */ - struct io_mapped_region zcrx_region; }; /* @@ -653,8 +651,7 @@ struct io_kiocb { u8 iopoll_completed; /* * Can be either a fixed buffer index, or used with provided buffers. - * For the latter, before issue it points to the buffer group ID, - * and after selection it points to the buffer ID itself. + * For the latter, it points to the selected buffer ID. */ u16 buf_index; @@ -713,7 +710,7 @@ struct io_kiocb { const struct cred *creds; struct io_wq_work work; - struct { + struct io_big_cqe { u64 extra1; u64 extra2; } big_cqe; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ccce8a751e2a..3a8d35d41fda 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -440,10 +440,10 @@ static inline int __iommu_copy_struct_from_user( void *dst_data, const struct iommu_user_data *src_data, unsigned int data_type, size_t data_len, size_t min_len) { - if (src_data->type != data_type) - return -EINVAL; if (WARN_ON(!dst_data || !src_data)) return -EINVAL; + if (src_data->type != data_type) + return -EINVAL; if (src_data->len < min_len || data_len < src_data->len) return -EINVAL; return copy_struct_from_user(dst_data, data_len, src_data->uptr, @@ -456,8 +456,8 @@ static inline int __iommu_copy_struct_from_user( * include/uapi/linux/iommufd.h * @user_data: Pointer to a struct iommu_user_data for user space data info * @data_type: The data type of the @kdst. Must match with @user_data->type - * @min_last: The last memember of the data structure @kdst points in the - * initial version. + * @min_last: The last member of the data structure @kdst points in the initial + * version. * Return 0 for success, otherwise -error. */ #define iommu_copy_struct_from_user(kdst, user_data, data_type, min_last) \ diff --git a/include/linux/irqchip/irq-davinci-aintc.h b/include/linux/irqchip/irq-davinci-aintc.h deleted file mode 100644 index ea4e087fac98..000000000000 --- a/include/linux/irqchip/irq-davinci-aintc.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2019 Texas Instruments - */ - -#ifndef _LINUX_IRQ_DAVINCI_AINTC_ -#define _LINUX_IRQ_DAVINCI_AINTC_ - -#include <linux/ioport.h> - -/** - * struct davinci_aintc_config - configuration data for davinci-aintc driver. - * - * @reg: register range to map - * @num_irqs: number of HW interrupts supported by the controller - * @prios: an array of size num_irqs containing priority settings for - * each interrupt - */ -struct davinci_aintc_config { - struct resource reg; - unsigned int num_irqs; - u8 *prios; -}; - -void davinci_aintc_init(const struct davinci_aintc_config *config); - -#endif /* _LINUX_IRQ_DAVINCI_AINTC_ */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5438a1b446a6..291d49b9bf05 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2382,7 +2382,7 @@ static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot) struct kvm_vcpu *kvm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) bool kvm_arch_has_irq_bypass(void); int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h index 013794fb5da0..065c185f2763 100644 --- a/include/linux/livepatch_sched.h +++ b/include/linux/livepatch_sched.h @@ -3,27 +3,23 @@ #define _LINUX_LIVEPATCH_SCHED_H_ #include <linux/jump_label.h> -#include <linux/static_call_types.h> +#include <linux/sched.h> #ifdef CONFIG_LIVEPATCH void __klp_sched_try_switch(void); -#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) - DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key); -static __always_inline void klp_sched_try_switch(void) +static __always_inline void klp_sched_try_switch(struct task_struct *curr) { - if (static_branch_unlikely(&klp_sched_try_switch_key)) + if (static_branch_unlikely(&klp_sched_try_switch_key) && + READ_ONCE(curr->__state) & TASK_FREEZABLE) __klp_sched_try_switch(); } -#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */ - #else /* !CONFIG_LIVEPATCH */ -static inline void klp_sched_try_switch(void) {} -static inline void __klp_sched_try_switch(void) {} +static inline void klp_sched_try_switch(struct task_struct *curr) {} #endif /* CONFIG_LIVEPATCH */ #endif /* _LINUX_LIVEPATCH_SCHED_H_ */ diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index 1a0bc35839e3..16a2ee4f8310 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -52,44 +52,23 @@ __local_unlock_irqrestore(lock, flags) /** - * localtry_lock_init - Runtime initialize a lock instance - */ -#define localtry_lock_init(lock) __localtry_lock_init(lock) - -/** - * localtry_lock - Acquire a per CPU local lock - * @lock: The lock variable - */ -#define localtry_lock(lock) __localtry_lock(lock) - -/** - * localtry_lock_irq - Acquire a per CPU local lock and disable interrupts - * @lock: The lock variable - */ -#define localtry_lock_irq(lock) __localtry_lock_irq(lock) - -/** - * localtry_lock_irqsave - Acquire a per CPU local lock, save and disable - * interrupts - * @lock: The lock variable - * @flags: Storage for interrupt flags + * local_lock_init - Runtime initialize a lock instance */ -#define localtry_lock_irqsave(lock, flags) \ - __localtry_lock_irqsave(lock, flags) +#define local_trylock_init(lock) __local_trylock_init(lock) /** - * localtry_trylock - Try to acquire a per CPU local lock. + * local_trylock - Try to acquire a per CPU local lock * @lock: The lock variable * * The function can be used in any context such as NMI or HARDIRQ. Due to * locking constrains it will _always_ fail to acquire the lock in NMI or * HARDIRQ context on PREEMPT_RT. */ -#define localtry_trylock(lock) __localtry_trylock(lock) +#define local_trylock(lock) __local_trylock(lock) /** - * localtry_trylock_irqsave - Try to acquire a per CPU local lock, save and disable - * interrupts if acquired + * local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable + * interrupts if acquired * @lock: The lock variable * @flags: Storage for interrupt flags * @@ -97,29 +76,8 @@ * locking constrains it will _always_ fail to acquire the lock in NMI or * HARDIRQ context on PREEMPT_RT. */ -#define localtry_trylock_irqsave(lock, flags) \ - __localtry_trylock_irqsave(lock, flags) - -/** - * local_unlock - Release a per CPU local lock - * @lock: The lock variable - */ -#define localtry_unlock(lock) __localtry_unlock(lock) - -/** - * local_unlock_irq - Release a per CPU local lock and enable interrupts - * @lock: The lock variable - */ -#define localtry_unlock_irq(lock) __localtry_unlock_irq(lock) - -/** - * localtry_unlock_irqrestore - Release a per CPU local lock and restore - * interrupt flags - * @lock: The lock variable - * @flags: Interrupt flags to restore - */ -#define localtry_unlock_irqrestore(lock, flags) \ - __localtry_unlock_irqrestore(lock, flags) +#define local_trylock_irqsave(lock, flags) \ + __local_trylock_irqsave(lock, flags) DEFINE_GUARD(local_lock, local_lock_t __percpu*, local_lock(_T), diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 67bd13d142fa..8d5ac16a9b17 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -15,10 +15,11 @@ typedef struct { #endif } local_lock_t; +/* local_trylock() and local_trylock_irqsave() only work with local_trylock_t */ typedef struct { local_lock_t llock; - unsigned int acquired; -} localtry_lock_t; + u8 acquired; +} local_trylock_t; #ifdef CONFIG_DEBUG_LOCK_ALLOC # define LOCAL_LOCK_DEBUG_INIT(lockname) \ @@ -29,6 +30,9 @@ typedef struct { }, \ .owner = NULL, +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) \ + .llock = { LOCAL_LOCK_DEBUG_INIT((lockname).llock) }, + static inline void local_lock_acquire(local_lock_t *l) { lock_map_acquire(&l->dep_map); @@ -56,6 +60,7 @@ static inline void local_lock_debug_init(local_lock_t *l) } #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) static inline void local_lock_acquire(local_lock_t *l) { } static inline void local_trylock_acquire(local_lock_t *l) { } static inline void local_lock_release(local_lock_t *l) { } @@ -63,7 +68,7 @@ static inline void local_lock_debug_init(local_lock_t *l) { } #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } -#define INIT_LOCALTRY_LOCK(lockname) { .llock = { LOCAL_LOCK_DEBUG_INIT(lockname.llock) }} +#define INIT_LOCAL_TRYLOCK(lockname) { LOCAL_TRYLOCK_DEBUG_INIT(lockname) } #define __local_lock_init(lock) \ do { \ @@ -76,6 +81,8 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_trylock_init(lock) __local_lock_init(lock.llock) + #define __spinlock_nested_bh_init(lock) \ do { \ static struct lock_class_key __key; \ @@ -87,149 +94,117 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_lock_acquire(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + _Generic((lock), \ + __percpu local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 0); \ + WRITE_ONCE(tl->acquired, 1); \ + }), \ + __percpu local_lock_t *: (void)0); \ + local_lock_acquire(l); \ + } while (0) + #define __local_lock(lock) \ do { \ preempt_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irq(lock) \ do { \ local_irq_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irqsave(lock, flags) \ do { \ local_irq_save(flags); \ - local_lock_acquire(this_cpu_ptr(lock)); \ - } while (0) - -#define __local_unlock(lock) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - preempt_enable(); \ + __local_lock_acquire(lock); \ } while (0) -#define __local_unlock_irq(lock) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - local_irq_enable(); \ - } while (0) - -#define __local_unlock_irqrestore(lock, flags) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - local_irq_restore(flags); \ - } while (0) - -#define __local_lock_nested_bh(lock) \ - do { \ - lockdep_assert_in_softirq(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ - } while (0) - -#define __local_unlock_nested_bh(lock) \ - local_lock_release(this_cpu_ptr(lock)) - -/* localtry_lock_t variants */ - -#define __localtry_lock_init(lock) \ -do { \ - __local_lock_init(&(lock)->llock); \ - WRITE_ONCE((lock)->acquired, 0); \ -} while (0) - -#define __localtry_lock(lock) \ - do { \ - localtry_lock_t *lt; \ - preempt_disable(); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_lock_irq(lock) \ - do { \ - localtry_lock_t *lt; \ - local_irq_disable(); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_lock_irqsave(lock, flags) \ - do { \ - localtry_lock_t *lt; \ - local_irq_save(flags); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_trylock(lock) \ +#define __local_trylock(lock) \ ({ \ - localtry_lock_t *lt; \ - bool _ret; \ + local_trylock_t *tl; \ \ preempt_disable(); \ - lt = this_cpu_ptr(lock); \ - if (!READ_ONCE(lt->acquired)) { \ - WRITE_ONCE(lt->acquired, 1); \ - local_trylock_acquire(<->llock); \ - _ret = true; \ - } else { \ - _ret = false; \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ preempt_enable(); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ } \ - _ret; \ + !!tl; \ }) -#define __localtry_trylock_irqsave(lock, flags) \ +#define __local_trylock_irqsave(lock, flags) \ ({ \ - localtry_lock_t *lt; \ - bool _ret; \ + local_trylock_t *tl; \ \ local_irq_save(flags); \ - lt = this_cpu_ptr(lock); \ - if (!READ_ONCE(lt->acquired)) { \ - WRITE_ONCE(lt->acquired, 1); \ - local_trylock_acquire(<->llock); \ - _ret = true; \ - } else { \ - _ret = false; \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ local_irq_restore(flags); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ } \ - _ret; \ + !!tl; \ }) -#define __localtry_unlock(lock) \ +#define __local_lock_release(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + local_lock_release(l); \ + _Generic((lock), \ + __percpu local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 1); \ + WRITE_ONCE(tl->acquired, 0); \ + }), \ + __percpu local_lock_t *: (void)0); \ + } while (0) + +#define __local_unlock(lock) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ preempt_enable(); \ } while (0) -#define __localtry_unlock_irq(lock) \ +#define __local_unlock_irq(lock) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ local_irq_enable(); \ } while (0) -#define __localtry_unlock_irqrestore(lock, flags) \ +#define __local_unlock_irqrestore(lock, flags) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ local_irq_restore(flags); \ } while (0) +#define __local_lock_nested_bh(lock) \ + do { \ + lockdep_assert_in_softirq(); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + +#define __local_unlock_nested_bh(lock) \ + local_lock_release(this_cpu_ptr(lock)) + #else /* !CONFIG_PREEMPT_RT */ /* @@ -237,16 +212,18 @@ do { \ * critical section while staying preemptible. */ typedef spinlock_t local_lock_t; -typedef spinlock_t localtry_lock_t; +typedef spinlock_t local_trylock_t; #define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) -#define INIT_LOCALTRY_LOCK(lockname) INIT_LOCAL_LOCK(lockname) +#define INIT_LOCAL_TRYLOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) #define __local_lock_init(l) \ do { \ local_spin_lock_init((l)); \ } while (0) +#define __local_trylock_init(l) __local_lock_init(l) + #define __local_lock(__lock) \ do { \ migrate_disable(); \ @@ -283,17 +260,7 @@ do { \ spin_unlock(this_cpu_ptr((lock))); \ } while (0) -/* localtry_lock_t variants */ - -#define __localtry_lock_init(lock) __local_lock_init(lock) -#define __localtry_lock(lock) __local_lock(lock) -#define __localtry_lock_irq(lock) __local_lock(lock) -#define __localtry_lock_irqsave(lock, flags) __local_lock_irqsave(lock, flags) -#define __localtry_unlock(lock) __local_unlock(lock) -#define __localtry_unlock_irq(lock) __local_unlock(lock) -#define __localtry_unlock_irqrestore(lock, flags) __local_unlock_irqrestore(lock, flags) - -#define __localtry_trylock(lock) \ +#define __local_trylock(lock) \ ({ \ int __locked; \ \ @@ -308,11 +275,11 @@ do { \ __locked; \ }) -#define __localtry_trylock_irqsave(lock, flags) \ +#define __local_trylock_irqsave(lock, flags) \ ({ \ typecheck(unsigned long, flags); \ flags = 0; \ - __localtry_trylock(lock); \ + __local_trylock(lock); \ }) #endif /* CONFIG_PREEMPT_RT */ diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 591bf5b5e8dc..9af01bdd86d2 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -44,7 +44,6 @@ #define MICREL_PHY_50MHZ_CLK BIT(0) #define MICREL_PHY_FXEN BIT(1) #define MICREL_KSZ8_P1_ERRATA BIT(2) -#define MICREL_NO_EEE BIT(3) #define MICREL_KSZ9021_EXTREG_CTRL 0xB #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC diff --git a/include/linux/mm.h b/include/linux/mm.h index b7f13f087954..fdda6b16263b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -385,7 +385,7 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR -# define VM_UFFD_MINOR_BIT 38 +# define VM_UFFD_MINOR_BIT 41 # define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */ #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ # define VM_UFFD_MINOR VM_NONE @@ -1218,6 +1218,23 @@ static inline unsigned int folio_order(const struct folio *folio) return folio_large_order(folio); } +/** + * folio_reset_order - Reset the folio order and derived _nr_pages + * @folio: The folio. + * + * Reset the order and derived _nr_pages to 0. Must only be used in the + * process of splitting large folios. + */ +static inline void folio_reset_order(struct folio *folio) +{ + if (WARN_ON_ONCE(!folio_test_large(folio))) + return; + folio->_flags_1 &= ~0xffUL; +#ifdef NR_PAGES_IN_LARGE_FOLIO + folio->_nr_pages = 0; +#endif +} + #include <linux/huge_mm.h> /* diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 56d07edd01f9..32ba5126e221 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -31,6 +31,7 @@ #define INIT_PASID 0 struct address_space; +struct futex_private_hash; struct mem_cgroup; /* @@ -1031,7 +1032,11 @@ struct mm_struct { */ seqcount_t mm_lock_seq; #endif - +#ifdef CONFIG_FUTEX_PRIVATE_HASH + struct mutex futex_hash_lock; + struct futex_private_hash __rcu *futex_phash; + struct futex_private_hash *futex_phash_new; +#endif unsigned long hiwater_rss; /* High-watermark of RSS usage */ unsigned long hiwater_vm; /* High-water virtual memory usage */ diff --git a/include/linux/mman.h b/include/linux/mman.h index bce214fece16..f4c6346a8fcd 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -155,7 +155,9 @@ calc_vm_flag_bits(struct file *file, unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | +#endif arch_calc_vm_flag_bits(file, flags); } diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 4706c6769902..e0eddfd306ef 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -7,6 +7,7 @@ #include <linux/rwsem.h> #include <linux/tracepoint-defs.h> #include <linux/types.h> +#include <linux/cleanup.h> #define MMAP_LOCK_INITIALIZER(name) \ .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), @@ -211,6 +212,9 @@ static inline void mmap_read_unlock(struct mm_struct *mm) up_read(&mm->mmap_lock); } +DEFINE_GUARD(mmap_read_lock, struct mm_struct *, + mmap_read_lock(_T), mmap_read_unlock(_T)) + static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) { __mmap_lock_trace_released(mm, false); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 25e80b2ca7f4..b1c459f7a485 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -148,7 +148,6 @@ enum zone_stat_item { NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ /* Second 128 byte cacheline */ - NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ #endif @@ -967,6 +966,9 @@ struct zone { #ifdef CONFIG_UNACCEPTED_MEMORY /* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */ struct list_head unaccepted_pages; + + /* To be called once the last page in the zone is accepted */ + struct work_struct unaccepted_cleanup; #endif /* zone flags, see below */ @@ -1499,8 +1501,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx, unsigned int alloc_flags); -bool zone_watermark_ok_safe(struct zone *z, unsigned int order, - unsigned long mark, int highest_zoneidx); /* * Memory initialization context, use to differentiate memory added by * the platform statically or via memory hotplug interface. diff --git a/include/linux/module.h b/include/linux/module.h index d94b196d5a34..8050f77c3b64 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -162,6 +162,8 @@ extern void cleanup_module(void); #define __INITRODATA_OR_MODULE __INITRODATA #endif /*CONFIG_MODULES*/ +struct module_kobject *lookup_or_create_module_kobject(const char *name); + /* Generic info of form tag = "info" */ #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) @@ -584,6 +586,11 @@ struct module { atomic_t refcnt; #endif +#ifdef CONFIG_MITIGATION_ITS + int its_num_pages; + void **its_page_array; +#endif + #ifdef CONFIG_CONSTRUCTORS /* Constructor functions. */ ctor_fn_t *ctors; diff --git a/include/linux/mount.h b/include/linux/mount.h index dcc17ce8a959..6904ad33ee7a 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -22,48 +22,51 @@ struct fs_context; struct file; struct path; -#define MNT_NOSUID 0x01 -#define MNT_NODEV 0x02 -#define MNT_NOEXEC 0x04 -#define MNT_NOATIME 0x08 -#define MNT_NODIRATIME 0x10 -#define MNT_RELATIME 0x20 -#define MNT_READONLY 0x40 /* does the user want this to be r/o? */ -#define MNT_NOSYMFOLLOW 0x80 - -#define MNT_SHRINKABLE 0x100 -#define MNT_WRITE_HOLD 0x200 - -#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ -#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ -/* - * MNT_SHARED_MASK is the set of flags that should be cleared when a - * mount becomes shared. Currently, this is only the flag that says a - * mount cannot be bind mounted, since this is how we create a mount - * that shares events with another mount. If you add a new MNT_* - * flag, consider how it interacts with shared mounts. - */ -#define MNT_SHARED_MASK (MNT_UNBINDABLE) -#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \ - | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ - | MNT_READONLY | MNT_NOSYMFOLLOW) -#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) - -#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) - -#define MNT_INTERNAL 0x4000 - -#define MNT_LOCK_ATIME 0x040000 -#define MNT_LOCK_NOEXEC 0x080000 -#define MNT_LOCK_NOSUID 0x100000 -#define MNT_LOCK_NODEV 0x200000 -#define MNT_LOCK_READONLY 0x400000 -#define MNT_LOCKED 0x800000 -#define MNT_DOOMED 0x1000000 -#define MNT_SYNC_UMOUNT 0x2000000 -#define MNT_MARKED 0x4000000 -#define MNT_UMOUNT 0x8000000 +enum mount_flags { + MNT_NOSUID = 0x01, + MNT_NODEV = 0x02, + MNT_NOEXEC = 0x04, + MNT_NOATIME = 0x08, + MNT_NODIRATIME = 0x10, + MNT_RELATIME = 0x20, + MNT_READONLY = 0x40, /* does the user want this to be r/o? */ + MNT_NOSYMFOLLOW = 0x80, + + MNT_SHRINKABLE = 0x100, + MNT_WRITE_HOLD = 0x200, + + MNT_SHARED = 0x1000, /* if the vfsmount is a shared mount */ + MNT_UNBINDABLE = 0x2000, /* if the vfsmount is a unbindable mount */ + + MNT_INTERNAL = 0x4000, + + MNT_LOCK_ATIME = 0x040000, + MNT_LOCK_NOEXEC = 0x080000, + MNT_LOCK_NOSUID = 0x100000, + MNT_LOCK_NODEV = 0x200000, + MNT_LOCK_READONLY = 0x400000, + MNT_LOCKED = 0x800000, + MNT_DOOMED = 0x1000000, + MNT_SYNC_UMOUNT = 0x2000000, + MNT_MARKED = 0x4000000, + MNT_UMOUNT = 0x8000000, + + /* + * MNT_SHARED_MASK is the set of flags that should be cleared when a + * mount becomes shared. Currently, this is only the flag that says a + * mount cannot be bind mounted, since this is how we create a mount + * that shares events with another mount. If you add a new MNT_* + * flag, consider how it interacts with shared mounts. + */ + MNT_SHARED_MASK = MNT_UNBINDABLE, + MNT_USER_SETTABLE_MASK = MNT_NOSUID | MNT_NODEV | MNT_NOEXEC + | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME + | MNT_READONLY | MNT_NOSYMFOLLOW, + MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, + + MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED, +}; struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 58a2401e4b55..0075f6e5c3da 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -262,6 +262,11 @@ struct mr_table { int mroute_reg_vif_num; }; +static inline bool mr_can_free_table(struct net *net) +{ + return !check_net(net) || !net_initialized(net); +} + #ifdef CONFIG_IP_MROUTE_COMMON void vif_device_init(struct vif_device *v, struct net_device *dev, diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 1e748958dad4..311f145eb4e8 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -67,7 +67,7 @@ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1), \ - __VA_OPT__(SPI_MEM_OP_MAX_FREQ(__VA_ARGS__))) + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) #define SPINAND_PAGE_READ_FROM_CACHE_FAST_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ diff --git a/include/linux/namei.h b/include/linux/namei.h index e3042176cdf4..5d085428e471 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -62,6 +62,7 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); +extern struct dentry *kern_path_locked_negative(const char *, struct path *); extern struct dentry *user_path_locked_at(int , const char __user *, struct path *); int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, struct path *parent, struct qstr *last, int *type, @@ -69,17 +70,16 @@ int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); -extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int); -extern struct dentry *lookup_one_len(const char *, struct dentry *, int); -extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); -extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int); -struct dentry *lookup_one(struct mnt_idmap *, const char *, struct dentry *, int); +extern struct dentry *try_lookup_noperm(struct qstr *, struct dentry *); +extern struct dentry *lookup_noperm(struct qstr *, struct dentry *); +extern struct dentry *lookup_noperm_unlocked(struct qstr *, struct dentry *); +extern struct dentry *lookup_noperm_positive_unlocked(struct qstr *, struct dentry *); +struct dentry *lookup_one(struct mnt_idmap *, struct qstr *, struct dentry *); struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, - const char *name, struct dentry *base, - int len); + struct qstr *name, struct dentry *base); struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap, - const char *name, - struct dentry *base, int len); + struct qstr *name, + struct dentry *base); extern int follow_down_one(struct path *); extern int follow_down(struct path *path, unsigned int flags); diff --git a/include/linux/net.h b/include/linux/net.h index 0ff950eecc6b..f60fff91e1df 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -70,6 +70,7 @@ enum sock_type { SOCK_DCCP = 6, SOCK_PACKET = 10, }; +#endif /* ARCH_HAS_SOCKET_TYPES */ #define SOCK_MAX (SOCK_PACKET + 1) /* Mask which covers at least up to SOCK_MASK-1. The @@ -81,8 +82,7 @@ enum sock_type { #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif - -#endif /* ARCH_HAS_SOCKET_TYPES */ +#define SOCK_COREDUMP O_NOCTTY /** * enum sock_shutdown_cmd - Shutdown types diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cf3b6445817b..7ea022750e4e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4429,6 +4429,7 @@ void linkwatch_fire_event(struct net_device *dev); * pending work list (if queued). */ void linkwatch_sync_dev(struct net_device *dev); +void __linkwatch_sync_dev(struct net_device *dev); /** * netif_carrier_ok - test if carrier present @@ -4971,9 +4972,11 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); +int netif_set_promiscuity(struct net_device *dev, int inc); int dev_set_promiscuity(struct net_device *dev, int inc); int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); +void netif_state_change(struct net_device *dev); void netdev_state_change(struct net_device *dev); void __netdev_notify_peers(struct net_device *dev); void netdev_notify_peers(struct net_device *dev); diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 9ad727ddfedb..0906a0b40c6a 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -55,7 +55,6 @@ enum nfs3_stable_how { NFS_INVALID_STABLE_HOW = -1 }; -#ifdef CONFIG_CRC32 /** * nfs_fhandle_hash - calculate the crc32 hash for the filehandle * @fh - pointer to filehandle @@ -67,10 +66,4 @@ static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) { return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); } -#else /* CONFIG_CRC32 */ -static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) -{ - return 0; -} -#endif /* CONFIG_CRC32 */ #endif /* _LINUX_NFS_H */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 71319637a84e..ee03f3cef30c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -213,6 +213,15 @@ struct nfs_server { char *fscache_uniq; /* Uniquifier (or NULL) */ #endif + /* The following #defines numerically match the NFSv4 equivalents */ +#define NFS_FH_NOEXPIRE_WITH_OPEN (0x1) +#define NFS_FH_VOLATILE_ANY (0x2) +#define NFS_FH_VOL_MIGRATION (0x4) +#define NFS_FH_VOL_RENAME (0x8) +#define NFS_FH_RENAME_UNSAFE (NFS_FH_VOLATILE_ANY | NFS_FH_VOL_RENAME) + u32 fh_expire_type; /* V4 bitmask representing file + handle volatility type for + this filesystem */ u32 pnfs_blksize; /* layout_blksize attr */ #if IS_ENABLED(CONFIG_NFS_V4) u32 attr_bitmask[3];/* V4 bitmask representing the set @@ -236,9 +245,6 @@ struct nfs_server { u32 acl_bitmask; /* V4 bitmask representing the ACEs that are supported on this filesystem */ - u32 fh_expire_type; /* V4 bitmask representing file - handle volatility type for - this filesystem */ struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; void *pnfs_ld_data; /* per mount point data */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 2479ed10f53e..51308f65b72f 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -303,6 +303,7 @@ enum nvme_ctrl_attr { NVME_CTRL_ATTR_TBKAS = (1 << 6), NVME_CTRL_ATTR_ELBAS = (1 << 15), NVME_CTRL_ATTR_RHII = (1 << 18), + NVME_CTRL_ATTR_FDPS = (1 << 19), }; struct nvme_id_ctrl { @@ -689,6 +690,44 @@ struct nvme_rotational_media_log { __u8 rsvd24[488]; }; +struct nvme_fdp_config { + __u8 flags; +#define FDPCFG_FDPE (1U << 0) + __u8 fdpcidx; + __le16 reserved; +}; + +struct nvme_fdp_ruh_desc { + __u8 ruht; + __u8 reserved[3]; +}; + +struct nvme_fdp_config_desc { + __le16 dsze; + __u8 fdpa; + __u8 vss; + __le32 nrg; + __le16 nruh; + __le16 maxpids; + __le32 nns; + __le64 runs; + __le32 erutl; + __u8 rsvd28[36]; + struct nvme_fdp_ruh_desc ruhs[]; +}; + +struct nvme_fdp_config_log { + __le16 numfdpc; + __u8 ver; + __u8 rsvd3; + __le32 sze; + __u8 rsvd8[8]; + /* + * This is followed by variable number of nvme_fdp_config_desc + * structures, but sparse doesn't like nested variable sized arrays. + */ +}; + struct nvme_smart_log { __u8 critical_warning; __u8 temperature[2]; @@ -915,6 +954,7 @@ enum nvme_opcode { nvme_cmd_resv_register = 0x0d, nvme_cmd_resv_report = 0x0e, nvme_cmd_resv_acquire = 0x11, + nvme_cmd_io_mgmt_recv = 0x12, nvme_cmd_resv_release = 0x15, nvme_cmd_zone_mgmt_send = 0x79, nvme_cmd_zone_mgmt_recv = 0x7a, @@ -936,6 +976,7 @@ enum nvme_opcode { nvme_opcode_name(nvme_cmd_resv_register), \ nvme_opcode_name(nvme_cmd_resv_report), \ nvme_opcode_name(nvme_cmd_resv_acquire), \ + nvme_opcode_name(nvme_cmd_io_mgmt_recv), \ nvme_opcode_name(nvme_cmd_resv_release), \ nvme_opcode_name(nvme_cmd_zone_mgmt_send), \ nvme_opcode_name(nvme_cmd_zone_mgmt_recv), \ @@ -1087,6 +1128,7 @@ enum { NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, NVME_RW_PRINFO_PRACT = 1 << 13, NVME_RW_DTYPE_STREAMS = 1 << 4, + NVME_RW_DTYPE_DPLCMT = 2 << 4, NVME_WZ_DEAC = 1 << 9, }; @@ -1174,6 +1216,38 @@ struct nvme_zone_mgmt_recv_cmd { __le32 cdw14[2]; }; +struct nvme_io_mgmt_recv_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __le64 rsvd2[2]; + union nvme_data_ptr dptr; + __u8 mo; + __u8 rsvd11; + __u16 mos; + __le32 numd; + __le32 cdw12[4]; +}; + +enum { + NVME_IO_MGMT_RECV_MO_RUHS = 1, +}; + +struct nvme_fdp_ruh_status_desc { + __le16 pid; + __le16 ruhid; + __le32 earutr; + __le64 ruamw; + __u8 reserved[16]; +}; + +struct nvme_fdp_ruh_status { + __u8 rsvd0[14]; + __le16 nruhsd; + struct nvme_fdp_ruh_status_desc ruhsd[]; +}; + enum { NVME_ZRA_ZONE_REPORT = 0, NVME_ZRASF_ZONE_REPORT_ALL = 0, @@ -1309,6 +1383,7 @@ enum { NVME_FEAT_PLM_WINDOW = 0x14, NVME_FEAT_HOST_BEHAVIOR = 0x16, NVME_FEAT_SANITIZE = 0x17, + NVME_FEAT_FDP = 0x1d, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, NVME_FEAT_RESV_MASK = 0x82, @@ -1329,6 +1404,7 @@ enum { NVME_LOG_ANA = 0x0c, NVME_LOG_FEATURES = 0x12, NVME_LOG_RMI = 0x16, + NVME_LOG_FDP_CONFIGS = 0x20, NVME_LOG_DISC = 0x70, NVME_LOG_RESERVATION = 0x80, NVME_FWACT_REPL = (0 << 3), @@ -1923,6 +1999,7 @@ struct nvme_command { struct nvmf_auth_receive_command auth_receive; struct nvme_dbbuf dbbuf; struct nvme_directive_cmd directive; + struct nvme_io_mgmt_recv_cmd imr; }; }; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e6a21b62dcce..3b814ce08331 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -615,6 +615,13 @@ FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE) PAGEFLAG_FALSE(HighMem, highmem) #endif +/* Does kmap_local_folio() only allow access to one page of the folio? */ +#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +#define folio_test_partial_kmap(f) true +#else +#define folio_test_partial_kmap(f) folio_test_highmem(f) +#endif + #ifdef CONFIG_SWAP static __always_inline bool folio_test_swapcache(const struct folio *folio) { diff --git a/include/linux/panic.h b/include/linux/panic.h index 2494d51707ef..4adc65766935 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -20,8 +20,6 @@ extern bool panic_triggering_all_cpu_backtrace; extern int panic_timeout; extern unsigned long panic_print; extern int panic_on_oops; -extern int panic_on_unrecovered_nmi; -extern int panic_on_io_nmi; extern int panic_on_warn; extern unsigned long panic_on_taint; diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index c5e9cac0575e..eeeff2a04529 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -79,4 +79,6 @@ static inline void part_stat_set_all(struct block_device *part, int value) #define part_stat_local_read_cpu(part, field, cpu) \ local_read(&(part_stat_get_cpu(part, field, cpu))) +unsigned int bdev_count_inflight(struct block_device *part); + #endif /* _LINUX_PART_STAT_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e8e3fd77e96..51e2bd6405cd 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -245,6 +245,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), /* Device does honor MSI masking despite saying otherwise */ PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12), + /* Device requires write to PCI_MSIX_ENTRY_DATA before any MSIX reads */ + PCI_DEV_FLAGS_MSIX_TOUCH_ENTRY_DATA_FIRST = (__force pci_dev_flags_t) (1 << 13), }; enum pci_irq_reroute_variant { diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index af7d75ede619..288f5235649a 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -43,9 +43,10 @@ is_static struct percpu_rw_semaphore name = { \ #define DEFINE_STATIC_PERCPU_RWSEM(name) \ __DEFINE_PERCPU_RWSEM(name, static) -extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool); +extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool, bool); -static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +static inline void percpu_down_read_internal(struct percpu_rw_semaphore *sem, + bool freezable) { might_sleep(); @@ -63,7 +64,7 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) if (likely(rcu_sync_is_idle(&sem->rss))) this_cpu_inc(*sem->read_count); else - __percpu_down_read(sem, false); /* Unconditional memory barrier */ + __percpu_down_read(sem, false, freezable); /* Unconditional memory barrier */ /* * The preempt_enable() prevents the compiler from * bleeding the critical section out. @@ -71,6 +72,17 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) preempt_enable(); } +static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +{ + percpu_down_read_internal(sem, false); +} + +static inline void percpu_down_read_freezable(struct percpu_rw_semaphore *sem, + bool freeze) +{ + percpu_down_read_internal(sem, freeze); +} + static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) { bool ret = true; @@ -82,7 +94,7 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) if (likely(rcu_sync_is_idle(&sem->rss))) this_cpu_inc(*sem->read_count); else - ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */ + ret = __percpu_down_read(sem, true, false); /* Unconditional memory barrier */ preempt_enable(); /* * The barrier() from preempt_enable() prevents the compiler from diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 52b5ea663b9f..85bf8dd9f087 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -15,11 +15,7 @@ /* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES -#ifdef CONFIG_MEM_ALLOC_PROFILING -#define PERCPU_MODULE_RESERVE (8 << 13) -#else #define PERCPU_MODULE_RESERVE (8 << 10) -#endif #else #define PERCPU_MODULE_RESERVE 0 #endif diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5a9bf15d4461..52dc7cfab0e0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -26,18 +26,9 @@ # include <asm/local64.h> #endif -#define PERF_GUEST_ACTIVE 0x01 -#define PERF_GUEST_USER 0x02 - -struct perf_guest_info_callbacks { - unsigned int (*state)(void); - unsigned long (*get_ip)(void); - unsigned int (*handle_intel_pt_intr)(void); -}; - #ifdef CONFIG_HAVE_HW_BREAKPOINT -#include <linux/rhashtable-types.h> -#include <asm/hw_breakpoint.h> +# include <linux/rhashtable-types.h> +# include <asm/hw_breakpoint.h> #endif #include <linux/list.h> @@ -62,19 +53,20 @@ struct perf_guest_info_callbacks { #include <linux/security.h> #include <linux/static_call.h> #include <linux/lockdep.h> + #include <asm/local.h> struct perf_callchain_entry { - __u64 nr; - __u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */ + u64 nr; + u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */ }; struct perf_callchain_entry_ctx { - struct perf_callchain_entry *entry; - u32 max_stack; - u32 nr; - short contexts; - bool contexts_maxed; + struct perf_callchain_entry *entry; + u32 max_stack; + u32 nr; + short contexts; + bool contexts_maxed; }; typedef unsigned long (*perf_copy_f)(void *dst, const void *src, @@ -121,8 +113,8 @@ static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag) * already stored in age order, the hw_idx should be 0. */ struct perf_branch_stack { - __u64 nr; - __u64 hw_idx; + u64 nr; + u64 hw_idx; struct perf_branch_entry entries[]; }; @@ -132,10 +124,10 @@ struct task_struct; * extra PMU register associated with an event */ struct hw_perf_event_extra { - u64 config; /* register value */ - unsigned int reg; /* register address or index */ - int alloc; /* extra register already allocated */ - int idx; /* index in shared_regs->regs[] */ + u64 config; /* register value */ + unsigned int reg; /* register address or index */ + int alloc; /* extra register already allocated */ + int idx; /* index in shared_regs->regs[] */ }; /** @@ -144,8 +136,8 @@ struct hw_perf_event_extra { * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific * usage. */ -#define PERF_EVENT_FLAG_ARCH 0x000fffff -#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 +#define PERF_EVENT_FLAG_ARCH 0x0fffffff +#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0); @@ -157,7 +149,9 @@ struct hw_perf_event { union { struct { /* hardware */ u64 config; + u64 config1; u64 last_tag; + u64 dyn_constraint; unsigned long config_base; unsigned long event_base; int event_base_rdpmc; @@ -225,9 +219,14 @@ struct hw_perf_event { /* * hw_perf_event::state flags; used to track the PERF_EF_* state. */ -#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ -#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ -#define PERF_HES_ARCH 0x04 + +/* the counter is stopped */ +#define PERF_HES_STOPPED 0x01 + +/* event->count up-to-date */ +#define PERF_HES_UPTODATE 0x02 + +#define PERF_HES_ARCH 0x04 int state; @@ -276,7 +275,7 @@ struct hw_perf_event { */ u64 freq_time_stamp; u64 freq_count_stamp; -#endif +#endif /* CONFIG_PERF_EVENTS */ }; struct perf_event; @@ -285,28 +284,33 @@ struct perf_event_pmu_context; /* * Common implementation detail of pmu::{start,commit,cancel}_txn */ -#define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ -#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ + +/* txn to add/schedule event on PMU */ +#define PERF_PMU_TXN_ADD 0x1 + +/* txn to read event group from PMU */ +#define PERF_PMU_TXN_READ 0x2 /** * pmu::capabilities flags */ -#define PERF_PMU_CAP_NO_INTERRUPT 0x0001 -#define PERF_PMU_CAP_NO_NMI 0x0002 -#define PERF_PMU_CAP_AUX_NO_SG 0x0004 -#define PERF_PMU_CAP_EXTENDED_REGS 0x0008 -#define PERF_PMU_CAP_EXCLUSIVE 0x0010 -#define PERF_PMU_CAP_ITRACE 0x0020 -#define PERF_PMU_CAP_NO_EXCLUDE 0x0040 -#define PERF_PMU_CAP_AUX_OUTPUT 0x0080 -#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 -#define PERF_PMU_CAP_AUX_PAUSE 0x0200 +#define PERF_PMU_CAP_NO_INTERRUPT 0x0001 +#define PERF_PMU_CAP_NO_NMI 0x0002 +#define PERF_PMU_CAP_AUX_NO_SG 0x0004 +#define PERF_PMU_CAP_EXTENDED_REGS 0x0008 +#define PERF_PMU_CAP_EXCLUSIVE 0x0010 +#define PERF_PMU_CAP_ITRACE 0x0020 +#define PERF_PMU_CAP_NO_EXCLUDE 0x0040 +#define PERF_PMU_CAP_AUX_OUTPUT 0x0080 +#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 +#define PERF_PMU_CAP_AUX_PAUSE 0x0200 +#define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400 /** * pmu::scope */ enum perf_pmu_scope { - PERF_PMU_SCOPE_NONE = 0, + PERF_PMU_SCOPE_NONE = 0, PERF_PMU_SCOPE_CORE, PERF_PMU_SCOPE_DIE, PERF_PMU_SCOPE_CLUSTER, @@ -325,6 +329,9 @@ struct perf_output_handle; struct pmu { struct list_head entry; + spinlock_t events_lock; + struct list_head events; + struct module *module; struct device *dev; struct device *parent; @@ -387,11 +394,21 @@ struct pmu { * Flags for ->add()/->del()/ ->start()/->stop(). There are * matching hw_perf_event::state flags. */ -#define PERF_EF_START 0x01 /* start the counter when adding */ -#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ -#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ -#define PERF_EF_PAUSE 0x08 /* AUX area event, pause tracing */ -#define PERF_EF_RESUME 0x10 /* AUX area event, resume tracing */ + +/* start the counter when adding */ +#define PERF_EF_START 0x01 + +/* reload the counter when starting */ +#define PERF_EF_RELOAD 0x02 + +/* update the counter when stopping */ +#define PERF_EF_UPDATE 0x04 + +/* AUX area event, pause tracing */ +#define PERF_EF_PAUSE 0x08 + +/* AUX area event, resume tracing */ +#define PERF_EF_RESUME 0x10 /* * Adds/Removes a counter to/from the PMU, can be done inside a @@ -590,10 +607,10 @@ enum perf_addr_filter_action_t { * This is a hardware-agnostic filter configuration as specified by the user. */ struct perf_addr_filter { - struct list_head entry; - struct path path; - unsigned long offset; - unsigned long size; + struct list_head entry; + struct path path; + unsigned long offset; + unsigned long size; enum perf_addr_filter_action_t action; }; @@ -608,23 +625,24 @@ struct perf_addr_filter { * bundled together; see perf_event_addr_filters(). */ struct perf_addr_filters_head { - struct list_head list; - raw_spinlock_t lock; - unsigned int nr_file_filters; + struct list_head list; + raw_spinlock_t lock; + unsigned int nr_file_filters; }; struct perf_addr_filter_range { - unsigned long start; - unsigned long size; + unsigned long start; + unsigned long size; }; /** * enum perf_event_state - the states of an event: */ enum perf_event_state { - PERF_EVENT_STATE_DEAD = -4, - PERF_EVENT_STATE_EXIT = -3, - PERF_EVENT_STATE_ERROR = -2, + PERF_EVENT_STATE_DEAD = -5, + PERF_EVENT_STATE_REVOKED = -4, /* pmu gone, must not touch */ + PERF_EVENT_STATE_EXIT = -3, /* task died, still inherit */ + PERF_EVENT_STATE_ERROR = -2, /* scheduling error, can enable */ PERF_EVENT_STATE_OFF = -1, PERF_EVENT_STATE_INACTIVE = 0, PERF_EVENT_STATE_ACTIVE = 1, @@ -662,24 +680,24 @@ struct swevent_hlist { struct rcu_head rcu_head; }; -#define PERF_ATTACH_CONTEXT 0x0001 -#define PERF_ATTACH_GROUP 0x0002 -#define PERF_ATTACH_TASK 0x0004 -#define PERF_ATTACH_TASK_DATA 0x0008 -#define PERF_ATTACH_GLOBAL_DATA 0x0010 -#define PERF_ATTACH_SCHED_CB 0x0020 -#define PERF_ATTACH_CHILD 0x0040 -#define PERF_ATTACH_EXCLUSIVE 0x0080 -#define PERF_ATTACH_CALLCHAIN 0x0100 -#define PERF_ATTACH_ITRACE 0x0200 +#define PERF_ATTACH_CONTEXT 0x0001 +#define PERF_ATTACH_GROUP 0x0002 +#define PERF_ATTACH_TASK 0x0004 +#define PERF_ATTACH_TASK_DATA 0x0008 +#define PERF_ATTACH_GLOBAL_DATA 0x0010 +#define PERF_ATTACH_SCHED_CB 0x0020 +#define PERF_ATTACH_CHILD 0x0040 +#define PERF_ATTACH_EXCLUSIVE 0x0080 +#define PERF_ATTACH_CALLCHAIN 0x0100 +#define PERF_ATTACH_ITRACE 0x0200 struct bpf_prog; struct perf_cgroup; struct perf_buffer; struct pmu_event_list { - raw_spinlock_t lock; - struct list_head list; + raw_spinlock_t lock; + struct list_head list; }; /* @@ -689,12 +707,12 @@ struct pmu_event_list { * disabled is sufficient since it will hold-off the IPIs. */ #ifdef CONFIG_PROVE_LOCKING -#define lockdep_assert_event_ctx(event) \ +# define lockdep_assert_event_ctx(event) \ WARN_ON_ONCE(__lockdep_enabled && \ (this_cpu_read(hardirqs_enabled) && \ lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD)) #else -#define lockdep_assert_event_ctx(event) +# define lockdep_assert_event_ctx(event) #endif #define for_each_sibling_event(sibling, event) \ @@ -823,7 +841,6 @@ struct perf_event { struct irq_work pending_disable_irq; struct callback_head pending_task; unsigned int pending_work; - struct rcuwait pending_work_wait; atomic_t event_limit; @@ -853,9 +870,9 @@ struct perf_event { #ifdef CONFIG_EVENT_TRACING struct trace_event_call *tp_event; struct event_filter *filter; -#ifdef CONFIG_FUNCTION_TRACER +# ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops ftrace_ops; -#endif +# endif #endif #ifdef CONFIG_CGROUP_PERF @@ -866,6 +883,7 @@ struct perf_event { void *security; #endif struct list_head sb_list; + struct list_head pmu_list; /* * Certain events gets forwarded to another pmu internally by over- @@ -873,7 +891,7 @@ struct perf_event { * of it. event->orig_type contains original 'type' requested by * user. */ - __u32 orig_type; + u32 orig_type; #endif /* CONFIG_PERF_EVENTS */ }; @@ -938,8 +956,8 @@ static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc) } struct perf_event_groups { - struct rb_root tree; - u64 index; + struct rb_root tree; + u64 index; }; @@ -1156,7 +1174,7 @@ extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags); extern void perf_event_itrace_started(struct perf_event *event); extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); -extern void perf_pmu_unregister(struct pmu *pmu); +extern int perf_pmu_unregister(struct pmu *pmu); extern void __perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task); @@ -1182,16 +1200,18 @@ extern void perf_pmu_resched(struct pmu *pmu); extern int perf_event_refresh(struct perf_event *event, int refresh); extern void perf_event_update_userpage(struct perf_event *event); extern int perf_event_release_kernel(struct perf_event *event); + extern struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, - int cpu, - struct task_struct *task, - perf_overflow_handler_t callback, - void *context); + int cpu, + struct task_struct *task, + perf_overflow_handler_t callback, + void *context); + extern void perf_pmu_migrate_context(struct pmu *pmu, - int src_cpu, int dst_cpu); -int perf_event_read_local(struct perf_event *event, u64 *value, - u64 *enabled, u64 *running); + int src_cpu, int dst_cpu); +extern int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); @@ -1408,14 +1428,14 @@ static inline u32 perf_sample_data_size(struct perf_sample_data *data, */ static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br) { - br->mispred = 0; - br->predicted = 0; - br->in_tx = 0; - br->abort = 0; - br->cycles = 0; - br->type = 0; - br->spec = PERF_BR_SPEC_NA; - br->reserved = 0; + br->mispred = 0; + br->predicted = 0; + br->in_tx = 0; + br->abort = 0; + br->cycles = 0; + br->type = 0; + br->spec = PERF_BR_SPEC_NA; + br->reserved = 0; } extern void perf_output_sample(struct perf_output_handle *handle, @@ -1604,7 +1624,17 @@ extern void perf_event_bpf_event(struct bpf_prog *prog, enum perf_bpf_event_type type, u16 flags); +#define PERF_GUEST_ACTIVE 0x01 +#define PERF_GUEST_USER 0x02 + +struct perf_guest_info_callbacks { + unsigned int (*state)(void); + unsigned long (*get_ip)(void); + unsigned int (*handle_intel_pt_intr)(void); +}; + #ifdef CONFIG_GUEST_PERF_EVENTS + extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state); @@ -1615,21 +1645,27 @@ static inline unsigned int perf_guest_state(void) { return static_call(__perf_guest_state)(); } + static inline unsigned long perf_guest_get_ip(void) { return static_call(__perf_guest_get_ip)(); } + static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return static_call(__perf_guest_handle_intel_pt_intr)(); } + extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); -#else + +#else /* !CONFIG_GUEST_PERF_EVENTS: */ + static inline unsigned int perf_guest_state(void) { return 0; } static inline unsigned long perf_guest_get_ip(void) { return 0; } static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; } -#endif /* CONFIG_GUEST_PERF_EVENTS */ + +#endif /* !CONFIG_GUEST_PERF_EVENTS */ extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); @@ -1659,6 +1695,7 @@ static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx * { if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) { struct perf_callchain_entry *entry = ctx->entry; + entry->ip[entry->nr++] = ip; ++ctx->contexts; return 0; @@ -1672,6 +1709,7 @@ static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 { if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) { struct perf_callchain_entry *entry = ctx->entry; + entry->ip[entry->nr++] = ip; ++ctx->nr; return 0; @@ -1698,7 +1736,7 @@ static inline int perf_is_paranoid(void) return sysctl_perf_event_paranoid > -1; } -int perf_allow_kernel(void); +extern int perf_allow_kernel(void); static inline int perf_allow_cpu(void) { @@ -1761,7 +1799,7 @@ static inline bool needs_branch_stack(struct perf_event *event) static inline bool has_aux(struct perf_event *event) { - return event->pmu->setup_aux; + return event->pmu && event->pmu->setup_aux; } static inline bool has_aux_action(struct perf_event *event) @@ -1820,7 +1858,7 @@ extern int perf_output_begin_backward(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern unsigned int perf_output_copy(struct perf_output_handle *handle, - const void *buf, unsigned int len); + const void *buf, unsigned int len); extern unsigned int perf_output_skip(struct perf_output_handle *handle, unsigned int len); extern long perf_output_copy_aux(struct perf_output_handle *aux_handle, @@ -1837,7 +1875,9 @@ extern void perf_event_task_tick(void); extern int perf_event_account_interrupt(struct perf_event *event); extern int perf_event_period(struct perf_event *event, u64 value); extern u64 perf_event_pause(struct perf_event *event, bool reset); + #else /* !CONFIG_PERF_EVENTS: */ + static inline void * perf_aux_output_begin(struct perf_output_handle *handle, struct perf_event *event) { return NULL; } @@ -1915,19 +1955,14 @@ static inline void perf_event_disable(struct perf_event *event) { } static inline int __perf_event_disable(void *info) { return -1; } static inline void perf_event_task_tick(void) { } static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } -static inline int perf_event_period(struct perf_event *event, u64 value) -{ - return -EINVAL; -} -static inline u64 perf_event_pause(struct perf_event *event, bool reset) -{ - return 0; -} -static inline int perf_exclude_event(struct perf_event *event, struct pt_regs *regs) -{ - return 0; -} -#endif +static inline int +perf_event_period(struct perf_event *event, u64 value) { return -EINVAL; } +static inline u64 +perf_event_pause(struct perf_event *event, bool reset) { return 0; } +static inline int +perf_exclude_event(struct perf_event *event, struct pt_regs *regs) { return 0; } + +#endif /* !CONFIG_PERF_EVENTS */ #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) extern void perf_restore_debug_store(void); @@ -1935,31 +1970,31 @@ extern void perf_restore_debug_store(void); static inline void perf_restore_debug_store(void) { } #endif -#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) +#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) struct perf_pmu_events_attr { - struct device_attribute attr; - u64 id; - const char *event_str; + struct device_attribute attr; + u64 id; + const char *event_str; }; struct perf_pmu_events_ht_attr { - struct device_attribute attr; - u64 id; - const char *event_str_ht; - const char *event_str_noht; + struct device_attribute attr; + u64 id; + const char *event_str_ht; + const char *event_str_noht; }; struct perf_pmu_events_hybrid_attr { - struct device_attribute attr; - u64 id; - const char *event_str; - u64 pmu_type; + struct device_attribute attr; + u64 id; + const char *event_str; + u64 pmu_type; }; struct perf_pmu_format_hybrid_attr { - struct device_attribute attr; - u64 pmu_type; + struct device_attribute attr; + u64 pmu_type; }; ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, @@ -2001,11 +2036,11 @@ static struct device_attribute format_attr_##_name = __ATTR_RO(_name) /* Performance counter hotplug functions */ #ifdef CONFIG_PERF_EVENTS -int perf_event_init_cpu(unsigned int cpu); -int perf_event_exit_cpu(unsigned int cpu); +extern int perf_event_init_cpu(unsigned int cpu); +extern int perf_event_exit_cpu(unsigned int cpu); #else -#define perf_event_init_cpu NULL -#define perf_event_exit_cpu NULL +# define perf_event_init_cpu NULL +# define perf_event_exit_cpu NULL #endif extern void arch_perf_update_userpage(struct perf_event *event, diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index c74077977830..8a7f4f802c57 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -188,6 +188,13 @@ static inline struct alloc_tag *__pgalloc_tag_get(struct page *page) return tag; } +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) +{ + if (mem_alloc_profiling_enabled()) + return __pgalloc_tag_get(page); + return NULL; +} + void pgalloc_tag_split(struct folio *folio, int old_order, int new_order); void pgalloc_tag_swap(struct folio *new, struct folio *old); @@ -199,6 +206,7 @@ static inline void clear_page_tag_ref(struct page *page) {} static inline void alloc_tag_sec_init(void) {} static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {} static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {} +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } #endif /* CONFIG_MEM_ALLOC_PROFILING */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e2b705c14945..b50447ef1c92 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1511,8 +1511,9 @@ static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, /* * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page - * tables copied during copy_page_range(). On success, stores the pfn to be - * passed to untrack_pfn_copy(). + * tables copied during copy_page_range(). Will store the pfn to be + * passed to untrack_pfn_copy() only if there is something to be untracked. + * Callers should initialize the pfn to 0. */ static inline int track_pfn_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, unsigned long *pfn) @@ -1522,7 +1523,9 @@ static inline int track_pfn_copy(struct vm_area_struct *dst_vma, /* * untrack_pfn_copy is called when a VM_PFNMAP VMA failed to copy during - * copy_page_range(), but after track_pfn_copy() was already called. + * copy_page_range(), but after track_pfn_copy() was already called. Can + * be called even if track_pfn_copy() did not actually track anything: + * handled internally. */ static inline void untrack_pfn_copy(struct vm_area_struct *dst_vma, unsigned long pfn) diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 1f5773ab5660..30659b615fca 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -361,23 +361,29 @@ int mac_finish(struct phylink_config *config, unsigned int mode, phy_interface_t iface); /** - * mac_link_down() - take the link down + * mac_link_down() - notification that the link has gone down * @config: a pointer to a &struct phylink_config. * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode * - * If @mode is not an in-band negotiation mode (as defined by - * phylink_autoneg_inband()), force the link down and disable any - * Energy Efficient Ethernet MAC configuration. Interface type - * selection must be done in mac_config(). + * Notifies the MAC that the link has gone down. This will not be called + * unless mac_link_up() has been previously called. + * + * The MAC should stop processing packets for transmission and reception. + * phylink will have called netif_carrier_off() to notify the networking + * stack that the link has gone down, so MAC drivers should not make this + * call. + * + * If @mode is %MLO_AN_INBAND, then this function must not prevent the + * link coming up. */ void mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface); /** - * mac_link_up() - allow the link to come up + * mac_link_up() - notification that the link has come up * @config: a pointer to a &struct phylink_config. - * @phy: any attached phy + * @phy: any attached phy (deprecated - please use LPI interfaces) * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode * @speed: link speed @@ -385,7 +391,10 @@ void mac_link_down(struct phylink_config *config, unsigned int mode, * @tx_pause: link transmit pause enablement status * @rx_pause: link receive pause enablement status * - * Configure the MAC for an established link. + * Notifies the MAC that the link has come up, and the parameters of the + * link as seen from the MACs point of view. If mac_link_up() has been + * called previously, there will be an intervening call to mac_link_down() + * before this method will be subsequently called. * * @speed, @duplex, @tx_pause and @rx_pause indicate the finalised link * settings, and should be used to configure the MAC block appropriately @@ -397,9 +406,9 @@ void mac_link_down(struct phylink_config *config, unsigned int mode, * that the user wishes to override the pause settings, and this should * be allowed when considering the implementation of this method. * - * If in-band negotiation mode is disabled, allow the link to come up. If - * @phy is non-%NULL, configure Energy Efficient Ethernet by calling - * phy_init_eee() and perform appropriate MAC configuration for EEE. + * Once configured, the MAC may begin to process packets for transmission + * and reception. + * * Interface type selection must be done in mac_config(). */ void mac_link_up(struct phylink_config *config, struct phy_device *phy, diff --git a/include/linux/pid.h b/include/linux/pid.h index 311ecebd7d56..453ae6d8a68d 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -77,7 +77,7 @@ struct file; struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); -int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); +int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret_file); void do_notify_pidfd(struct task_struct *task); static inline struct pid *get_pid(struct pid *pid) diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h index 05e6f8f4a026..77e7db194914 100644 --- a/include/linux/pidfs.h +++ b/include/linux/pidfs.h @@ -2,11 +2,19 @@ #ifndef _LINUX_PID_FS_H #define _LINUX_PID_FS_H +struct coredump_params; + struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); void __init pidfs_init(void); void pidfs_add_pid(struct pid *pid); void pidfs_remove_pid(struct pid *pid); void pidfs_exit(struct task_struct *tsk); +#ifdef CONFIG_COREDUMP +void pidfs_coredump(const struct coredump_params *cprm); +#endif extern const struct dentry_operations pidfs_dentry_operations; +int pidfs_register_pid(struct pid *pid); +void pidfs_get_pid(struct pid *pid); +void pidfs_put_pid(struct pid *pid); #endif /* _LINUX_PID_FS_H */ diff --git a/include/linux/platform_data/x86/intel_pmc_ipc.h b/include/linux/platform_data/x86/intel_pmc_ipc.h index 6e603a8c075f..1d34435b7001 100644 --- a/include/linux/platform_data/x86/intel_pmc_ipc.h +++ b/include/linux/platform_data/x86/intel_pmc_ipc.h @@ -36,6 +36,7 @@ struct pmc_ipc_rbuf { */ static inline int intel_pmc_ipc(struct pmc_ipc_cmd *ipc_cmd, struct pmc_ipc_rbuf *rbuf) { +#ifdef CONFIG_ACPI struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object params[PMC_IPCS_PARAM_COUNT] = { {.type = ACPI_TYPE_INTEGER,}, @@ -89,6 +90,9 @@ static inline int intel_pmc_ipc(struct pmc_ipc_cmd *ipc_cmd, struct pmc_ipc_rbuf } return 0; +#else + return -ENODEV; +#endif /* CONFIG_ACPI */ } #endif /* INTEL_PMC_IPC_H */ diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index f3cad182d4ef..0b3a36bdaa90 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -954,6 +954,7 @@ int sev_do_cmd(int cmd, void *data, int *psp_ret); void *psp_copy_user_blob(u64 uaddr, u32 len); void *snp_alloc_firmware_page(gfp_t mask); void snp_free_firmware_page(void *addr); +void sev_platform_shutdown(void); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ @@ -988,6 +989,8 @@ static inline void *snp_alloc_firmware_page(gfp_t mask) static inline void snp_free_firmware_page(void *addr) { } +static inline void sev_platform_shutdown(void) { } + #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ #endif /* __PSP_SEV_H__ */ diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h index 6322d8c1c6b4..2fb2af6d9824 100644 --- a/include/linux/rcuref.h +++ b/include/linux/rcuref.h @@ -30,7 +30,11 @@ static inline void rcuref_init(rcuref_t *ref, unsigned int cnt) * rcuref_read - Read the number of held reference counts of a rcuref * @ref: Pointer to the reference count * - * Return: The number of held references (0 ... N) + * Return: The number of held references (0 ... N). The value 0 does not + * indicate that it is safe to schedule the object, protected by this reference + * counter, for deconstruction. + * If you want to know if the reference counter has been marked DEAD (as + * signaled by rcuref_put()) please use rcuread_is_dead(). */ static inline unsigned int rcuref_read(rcuref_t *ref) { @@ -40,6 +44,22 @@ static inline unsigned int rcuref_read(rcuref_t *ref) return c >= RCUREF_RELEASED ? 0 : c + 1; } +/** + * rcuref_is_dead - Check if the rcuref has been already marked dead + * @ref: Pointer to the reference count + * + * Return: True if the object has been marked DEAD. This signals that a previous + * invocation of rcuref_put() returned true on this reference counter meaning + * the protected object can safely be scheduled for deconstruction. + * Otherwise, returns false. + */ +static inline bool rcuref_is_dead(rcuref_t *ref) +{ + unsigned int c = atomic_read(&ref->refcnt); + + return (c >= RCUREF_RELEASED) && (c < RCUREF_NOREF); +} + extern __must_check bool rcuref_get_slowpath(rcuref_t *ref); /** diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h index 13f17676c5f4..7e50bbc94e47 100644 --- a/include/linux/restart_block.h +++ b/include/linux/restart_block.h @@ -26,7 +26,7 @@ struct restart_block { unsigned long arch_data; long (*fn)(struct restart_block *); union { - /* For futex_wait and futex_wait_requeue_pi */ + /* For futex_wait() */ struct { u32 __user *uaddr; u32 val; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ccaaf4c7d5f6..ea39dd23a197 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -240,6 +240,6 @@ rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group) return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); } -void netdev_set_operstate(struct net_device *dev, int newstate); +void netif_set_operstate(struct net_device *dev, int newstate); #endif /* __LINUX_RTNETLINK_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index f96ac1982893..45e5953b8f32 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -44,7 +44,6 @@ #include <linux/seqlock_types.h> #include <linux/kcsan.h> #include <linux/rv.h> -#include <linux/livepatch_sched.h> #include <linux/uidgid_types.h> #include <linux/tracepoint-defs.h> #include <asm/kmap_size.h> @@ -1646,22 +1645,15 @@ struct task_struct { struct user_event_mm *user_event_mm; #endif - /* - * New fields for task_struct should be added above here, so that - * they are included in the randomized portion of task_struct. - */ - randomized_struct_fields_end - /* CPU-specific state of this task: */ struct thread_struct thread; /* - * WARNING: on x86, 'thread_struct' contains a variable-sized - * structure. It *MUST* be at the end of 'task_struct'. - * - * Do not put anything below here! + * New fields for task_struct should be added above here, so that + * they are included in the randomized portion of task_struct. */ -}; + randomized_struct_fields_end +} __attribute__ ((aligned (64))); #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) @@ -2089,9 +2081,6 @@ extern int __cond_resched(void); #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) -void sched_dynamic_klp_enable(void); -void sched_dynamic_klp_disable(void); - DECLARE_STATIC_CALL(cond_resched, __cond_resched); static __always_inline int _cond_resched(void) @@ -2112,7 +2101,6 @@ static __always_inline int _cond_resched(void) static inline int _cond_resched(void) { - klp_sched_try_switch(); return __cond_resched(); } @@ -2122,7 +2110,6 @@ static inline int _cond_resched(void) static inline int _cond_resched(void) { - klp_sched_try_switch(); return 0; } diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 7b4301b7235f..198bb5cc1774 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -195,6 +195,8 @@ struct sched_domain_topology_level { }; extern void __init set_sched_topology(struct sched_domain_topology_level *tl); +extern void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio); + # define SD_INIT_NAME(type) .name = #type @@ -223,6 +225,10 @@ static inline bool cpus_share_resources(int this_cpu, int that_cpu) return true; } +static inline void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio) +{ +} + #endif /* !CONFIG_SMP */ #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 0b273a7b9f01..5f03a39a26f7 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -104,10 +104,11 @@ static inline bool shmem_mapping(struct address_space *mapping) return false; } #endif /* CONFIG_SHMEM */ -extern void shmem_unlock_mapping(struct address_space *mapping); -extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, +void shmem_unlock_mapping(struct address_space *mapping); +struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); -extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); +int shmem_writeout(struct folio *folio, struct writeback_control *wbc); +void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 493d9de4e472..dc6ebaee3d18 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -365,7 +365,7 @@ struct sdw_intel_res { * on e.g. which machine driver to select (I2S mode, HDaudio or * SoundWire). */ -int sdw_intel_acpi_scan(acpi_handle *parent_handle, +int sdw_intel_acpi_scan(acpi_handle parent_handle, struct sdw_intel_acpi_info *info); void sdw_intel_process_wakeen_event(struct sdw_intel_ctx *ctx); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 0ba5e49bace4..6e64f0193777 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -249,10 +249,7 @@ struct spi_device { static_assert((SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK) == 0, "SPI_MODE_USER_MASK & SPI_MODE_KERNEL_MASK must not overlap"); -static inline struct spi_device *to_spi_device(const struct device *dev) -{ - return dev ? container_of(dev, struct spi_device, dev) : NULL; -} +#define to_spi_device(__dev) container_of_const(__dev, struct spi_device, dev) /* Most drivers won't need to care about device refcounting */ static inline struct spi_device *spi_dev_get(struct spi_device *spi) diff --git a/include/linux/stat.h b/include/linux/stat.h index be7496a6a0dd..e3d00e7bb26d 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -57,6 +57,7 @@ struct kstat { u32 dio_read_offset_align; u32 atomic_write_unit_min; u32 atomic_write_unit_max; + u32 atomic_write_unit_max_opt; u32 atomic_write_segments_max; }; diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index e39d4d563b19..785048a3b3e6 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -51,7 +51,7 @@ struct tk_read_base { * @offs_real: Offset clock monotonic -> clock realtime * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai - * @tai_offset: The current UTC to TAI offset in seconds + * @coarse_nsec: The nanoseconds part for coarse time getters * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds * @clock_was_set_seq: The sequence number of clock was set events @@ -76,6 +76,7 @@ struct tk_read_base { * ntp shifted nano seconds. * @ntp_err_mult: Multiplication factor for scaled math conversion * @skip_second_overflow: Flag used to avoid updating NTP twice with same second + * @tai_offset: The current UTC to TAI offset in seconds * * Note: For timespec(64) based interfaces wall_to_monotonic is what * we need to add to xtime (or xtime corrected for sub jiffy times) @@ -100,7 +101,7 @@ struct tk_read_base { * which results in the following cacheline layout: * * 0: seqcount, tkr_mono - * 1: xtime_sec ... tai_offset + * 1: xtime_sec ... coarse_nsec * 2: tkr_raw, raw_sec * 3,4: Internal variables * @@ -121,7 +122,7 @@ struct timekeeper { ktime_t offs_real; ktime_t offs_boot; ktime_t offs_tai; - s32 tai_offset; + u32 coarse_nsec; /* Cacheline 2: */ struct tk_read_base tkr_raw; @@ -144,6 +145,7 @@ struct timekeeper { u32 ntp_error_shift; u32 ntp_err_mult; u32 skip_second_overflow; + s32 tai_offset; }; #ifdef CONFIG_GENERIC_TIME_VSYSCALL diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 6c3125300c00..a3d8305e88a5 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -224,7 +224,7 @@ enum tpm2_const { enum tpm2_timeouts { TPM2_TIMEOUT_A = 750, - TPM2_TIMEOUT_B = 2000, + TPM2_TIMEOUT_B = 4000, TPM2_TIMEOUT_C = 200, TPM2_TIMEOUT_D = 30, TPM2_DURATION_SHORT = 20, @@ -257,6 +257,7 @@ enum tpm2_return_codes { TPM2_RC_TESTING = 0x090A, /* RC_WARN */ TPM2_RC_REFERENCE_H0 = 0x0910, TPM2_RC_RETRY = 0x0922, + TPM2_RC_SESSION_MEMORY = 0x0903, }; enum tpm2_command_codes { @@ -437,6 +438,24 @@ static inline u32 tpm2_rc_value(u32 rc) return (rc & BIT(7)) ? rc & 0xbf : rc; } +/* + * Convert a return value from tpm_transmit_cmd() to POSIX error code. + */ +static inline ssize_t tpm_ret_to_err(ssize_t ret) +{ + if (ret < 0) + return ret; + + switch (tpm2_rc_value(ret)) { + case TPM2_RC_SUCCESS: + return 0; + case TPM2_RC_SESSION_MEMORY: + return -ENOMEM; + default: + return -EFAULT; + } +} + #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_is_tpm2(struct tpm_chip *chip); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 4d16c13d0df5..64cb4b04be7a 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -220,6 +220,8 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev); * occurs. * @reset_done: optional function to call after transport specific reset * operation has finished. + * @shutdown: synchronize with the device on shutdown. If provided, replaces + * the virtio core implementation. */ struct virtio_driver { struct device_driver driver; @@ -237,6 +239,7 @@ struct virtio_driver { int (*restore)(struct virtio_device *dev); int (*reset_prepare)(struct virtio_device *dev); int (*reset_done)(struct virtio_device *dev); + void (*shutdown)(struct virtio_device *dev); }; #define drv_to_virtio(__drv) container_of_const(__drv, struct virtio_driver, driver) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 31e9ffd936e3..a40a905e5e1b 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -61,6 +61,7 @@ struct vm_struct { unsigned int nr_pages; phys_addr_t phys_addr; const void *caller; + unsigned long requested_size; }; struct vmap_area { @@ -168,8 +169,13 @@ void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_m int node, const void *caller) __alloc_size(1); #define __vmalloc_node(...) alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__)) -void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1); -#define vmalloc_huge(...) alloc_hooks(vmalloc_huge_noprof(__VA_ARGS__)) +void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node) __alloc_size(1); +#define vmalloc_huge_node(...) alloc_hooks(vmalloc_huge_node_noprof(__VA_ARGS__)) + +static inline void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) +{ + return vmalloc_huge_node(size, gfp_mask, NUMA_NO_NODE); +} extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); #define __vmalloc_array(...) alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__)) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index a8586c3058c7..797992019f9e 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1931,6 +1931,8 @@ struct hci_cp_le_pa_create_sync { __u8 sync_cte_type; } __packed; +#define HCI_OP_LE_PA_CREATE_SYNC_CANCEL 0x2045 + #define HCI_OP_LE_PA_TERM_SYNC 0x2046 struct hci_cp_le_pa_term_sync { __le16 handle; @@ -2830,7 +2832,7 @@ struct hci_evt_le_create_big_complete { __le16 bis_handle[]; } __packed; -#define HCI_EVT_LE_BIG_SYNC_ESTABILISHED 0x1d +#define HCI_EVT_LE_BIG_SYNC_ESTABLISHED 0x1d struct hci_evt_le_big_sync_estabilished { __u8 status; __u8 handle; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 5115da34f881..54bfeeaa0995 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1113,10 +1113,8 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev, return NULL; } -static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev, - __u8 sid, - bdaddr_t *dst, - __u8 dst_type) +static inline struct hci_conn * +hci_conn_hash_lookup_create_pa_sync(struct hci_dev *hdev) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; @@ -1124,8 +1122,10 @@ static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK || bacmp(&c->dst, dst) || - c->dst_type != dst_type || c->sid != sid) + if (c->type != ISO_LINK) + continue; + + if (!test_bit(HCI_CONN_CREATE_PA_SYNC, &c->flags)) continue; rcu_read_unlock(); @@ -1524,8 +1524,6 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); bool hci_iso_setup_path(struct hci_conn *conn); int hci_le_create_cis_pending(struct hci_dev *hdev); -int hci_pa_create_sync_pending(struct hci_dev *hdev); -int hci_le_big_create_sync_pending(struct hci_dev *hdev); int hci_conn_check_create_cis(struct hci_conn *conn); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, @@ -1566,9 +1564,9 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 data_len, __u8 *data); struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos); -int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, - struct bt_iso_qos *qos, - __u16 sync_handle, __u8 num_bis, __u8 bis[]); +int hci_conn_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, + struct bt_iso_qos *qos, __u16 sync_handle, + __u8 num_bis, __u8 bis[]); int hci_conn_check_link_mode(struct hci_conn *conn); int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level); int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type, @@ -1800,6 +1798,7 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, void hci_uuids_clear(struct hci_dev *hdev); void hci_link_keys_clear(struct hci_dev *hdev); +u8 *hci_conn_key_enc_size(struct hci_conn *conn); struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr); struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, bdaddr_t *bdaddr, u8 *val, u8 type, diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 7e2cf0cca939..72558c826aa1 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -185,3 +185,6 @@ int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, struct hci_conn_params *params); + +int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn); +int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn); diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 5927910ec06e..6e68e359ad18 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -45,6 +45,8 @@ struct fib_rule { struct fib_rule_port_range dport_range; u16 sport_mask; u16 dport_mask; + u8 iif_is_l3_master; + u8 oif_is_l3_master; struct rcu_head rcu; }; diff --git a/include/net/flow.h b/include/net/flow.h index 335bbc52171c..2a3f0c42f092 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -38,6 +38,7 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 +#define FLOWI_FLAG_L3MDEV_OIF 0x04 __u32 flowic_secid; kuid_t flowic_uid; __u32 flowic_multipath_hash; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index f7fe796e8429..1eb8dad18f7e 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -59,6 +59,20 @@ int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net, int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg); +static inline +bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) +{ + return !(fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF) && + fl->flowi_l3mdev == iifindex; +} + +static inline +bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) +{ + return fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF && + fl->flowi_l3mdev == oifindex; +} + void l3mdev_update_flow(struct net *net, struct flowi *fl); int l3mdev_master_ifindex_rcu(const struct net_device *dev); @@ -327,6 +341,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, { return 1; } + +static inline +bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) +{ + return false; +} + +static inline +bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) +{ + return false; +} + static inline void l3mdev_update_flow(struct net *net, struct flowi *fl) { diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index c316b551df8d..0ee5bc766810 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -98,6 +98,9 @@ static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, &qdisc_xmit_lock_key); \ } +#define netdev_lock_dereference(p, dev) \ + rcu_dereference_protected(p, lockdep_is_held(&(dev)->lock)) + int netdev_debug_event(struct notifier_block *nb, unsigned long event, void *ptr); diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 825141d675e5..d01c82983b4d 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -103,6 +103,12 @@ struct netdev_stat_ops { struct netdev_queue_stats_tx *tx); }; +void netdev_stat_queue_sum(struct net_device *netdev, + int rx_start, int rx_end, + struct netdev_queue_stats_rx *rx_sum, + int tx_start, int tx_end, + struct netdev_queue_stats_tx *tx_sum); + /** * struct netdev_queue_mgmt_ops - netdev ops for queue management * diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d48c657191cd..1c05fed05f2b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1031,6 +1031,21 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) return skb; } +static inline struct sk_buff *qdisc_dequeue_internal(struct Qdisc *sch, bool direct) +{ + struct sk_buff *skb; + + skb = __skb_dequeue(&sch->gso_skb); + if (skb) { + sch->q.qlen--; + return skb; + } + if (direct) + return __qdisc_dequeue_head(&sch->q); + else + return sch->dequeue(sch); +} + static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) { struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 31248cfdfb23..dcd288fa1bb6 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -775,6 +775,7 @@ struct sctp_transport { /* Reference counting. */ refcount_t refcnt; + __u32 dead:1, /* RTO-Pending : A flag used to track if one of the DATA * chunks sent to this address is currently being * used to compute a RTT. If this flag is 0, @@ -784,7 +785,7 @@ struct sctp_transport { * calculation completes (i.e. the DATA chunk * is SACK'd) clear this flag. */ - __u32 rto_pending:1, + rto_pending:1, /* * hb_sent : a flag that signals that we have a pending diff --git a/include/net/sock.h b/include/net/sock.h index 8daf1b3b12c6..694f954258d4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -339,6 +339,8 @@ struct sk_filter; * @sk_txtime_unused: unused txtime flags * @ns_tracker: tracker for netns reference * @sk_user_frags: xarray of pages the user is holding a reference on. + * @sk_owner: reference to the real owner of the socket that calls + * sock_lock_init_class_and_name(). */ struct sock { /* @@ -547,6 +549,10 @@ struct sock { struct rcu_head sk_rcu; netns_tracker ns_tracker; struct xarray sk_user_frags; + +#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES) + struct module *sk_owner; +#endif }; struct sock_bh_locked { @@ -1583,6 +1589,35 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) sk_mem_reclaim(sk); } +#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES) +static inline void sk_owner_set(struct sock *sk, struct module *owner) +{ + __module_get(owner); + sk->sk_owner = owner; +} + +static inline void sk_owner_clear(struct sock *sk) +{ + sk->sk_owner = NULL; +} + +static inline void sk_owner_put(struct sock *sk) +{ + module_put(sk->sk_owner); +} +#else +static inline void sk_owner_set(struct sock *sk, struct module *owner) +{ +} + +static inline void sk_owner_clear(struct sock *sk) +{ +} + +static inline void sk_owner_put(struct sock *sk) +{ +} +#endif /* * Macro so as to not evaluate some arguments when * lockdep is not enabled. @@ -1592,13 +1627,14 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) */ #define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ do { \ + sk_owner_set(sk, THIS_MODULE); \ sk->sk_lock.owned = 0; \ init_waitqueue_head(&sk->sk_lock.wq); \ spin_lock_init(&(sk)->sk_lock.slock); \ debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ - sizeof((sk)->sk_lock)); \ + sizeof((sk)->sk_lock)); \ lockdep_set_class_and_name(&(sk)->sk_lock.slock, \ - (skey), (sname)); \ + (skey), (sname)); \ lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ } while (0) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index a58ae7589d12..e8bd6ddb7b12 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -71,9 +71,6 @@ struct xdp_sock { */ u32 tx_budget_spent; - /* Protects generic receive. */ - spinlock_t rx_lock; - /* Statistics */ u64 rx_dropped; u64 rx_queue_full; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 39365fd2ea17..06ab2a3d2ebd 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -236,7 +236,6 @@ struct xfrm_state { /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; - struct sock __rcu *encap_sk; /* NAT keepalive */ u32 nat_keepalive_interval; /* seconds */ diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 1dcd4d71468a..cac56e6b0869 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -53,6 +53,8 @@ struct xsk_buff_pool { refcount_t users; struct xdp_umem *umem; struct work_struct work; + /* Protects generic receive in shared and non-shared umem mode. */ + spinlock_t rx_lock; struct list_head free_list; struct list_head xskb_list; u32 heads_cnt; @@ -238,8 +240,8 @@ static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb, return orig_addr; offset = xskb->xdp.data - xskb->xdp.data_hard_start; - orig_addr -= offset; offset += pool->headroom; + orig_addr -= offset; return orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d42eae69d9a8..901353796fbb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4790,7 +4790,14 @@ void roce_del_all_netdev_gids(struct ib_device *ib_dev, struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); +#else +static inline int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) +{ + return 0; +} +#endif struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 26bc23419cfd..c53812b9026f 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -670,8 +670,6 @@ struct Scsi_Host { /* The transport requires the LUN bits NOT to be stored in CDB[1] */ unsigned no_scsi2_lun_in_cdb:1; - unsigned no_highmem:1; - /* * Optional work queue to be utilized by the transport */ diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 8becb4504887..8582d22f3818 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -1404,6 +1404,8 @@ int snd_pcm_lib_mmap_iomem(struct snd_pcm_substream *substream, struct vm_area_s #define snd_pcm_lib_mmap_iomem NULL #endif +void snd_pcm_runtime_buffer_set_silence(struct snd_pcm_runtime *runtime); + /** * snd_pcm_limit_isa_dma_size - Get the max size fitting with ISA DMA transfer * @dma: DMA number diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 36a4a1e1d8ca..d8bd5d37131a 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -226,6 +226,7 @@ int asoc_sdw_cs_amp_init(struct snd_soc_card *card, bool playback); int asoc_sdw_cs_spk_feedback_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_cs35l56_volume_limit(struct snd_soc_card *card, const char *name_prefix); /* MAXIM codec support */ int asoc_sdw_maxim_init(struct snd_soc_card *card, diff --git a/include/sound/ump_convert.h b/include/sound/ump_convert.h index d099ae27f849..682499b871ea 100644 --- a/include/sound/ump_convert.h +++ b/include/sound/ump_convert.h @@ -19,7 +19,7 @@ struct ump_cvt_to_ump_bank { /* context for converting from MIDI1 byte stream to UMP packet */ struct ump_cvt_to_ump { /* MIDI1 intermediate buffer */ - unsigned char buf[4]; + unsigned char buf[6]; /* up to 6 bytes for SysEx */ int len; int cmd_bytes; diff --git a/include/sound/ump_msg.h b/include/sound/ump_msg.h index 72f60ddfea75..9556b4755a1e 100644 --- a/include/sound/ump_msg.h +++ b/include/sound/ump_msg.h @@ -604,7 +604,7 @@ struct snd_ump_stream_msg_ep_info { } __packed; /* UMP Stream Message: Device Info Notification (128bit) */ -struct snd_ump_stream_msg_devince_info { +struct snd_ump_stream_msg_device_info { #ifdef __BIG_ENDIAN_BITFIELD /* 0 */ u32 type:4; @@ -754,7 +754,7 @@ struct snd_ump_stream_msg_fb_name { union snd_ump_stream_msg { struct snd_ump_stream_msg_ep_discovery ep_discovery; struct snd_ump_stream_msg_ep_info ep_info; - struct snd_ump_stream_msg_devince_info device_info; + struct snd_ump_stream_msg_device_info device_info; struct snd_ump_stream_msg_stream_cfg stream_cfg; struct snd_ump_stream_msg_fb_discovery fb_discovery; struct snd_ump_stream_msg_fb_info fb_info; diff --git a/include/trace/events/block.h b/include/trace/events/block.h index bd0ea07338eb..14a924c0e303 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -11,7 +11,7 @@ #include <linux/tracepoint.h> #include <uapi/linux/ioprio.h> -#define RWBS_LEN 8 +#define RWBS_LEN 9 #define IOPRIO_CLASS_STRINGS \ { IOPRIO_CLASS_NONE, "none" }, \ @@ -361,21 +361,6 @@ DECLARE_EVENT_CLASS(block_bio, ); /** - * block_bio_bounce - used bounce buffer when processing block operation - * @bio: block operation - * - * A bounce buffer was used to handle the block operation @bio in @q. - * This occurs when hardware limitations prevent a direct transfer of - * data between the @bio data memory area and the IO device. Use of a - * bounce buffer requires extra copying of data and decreases - * performance. - */ -DEFINE_EVENT(block_bio, block_bio_bounce, - TP_PROTO(struct bio *bio), - TP_ARGS(bio) -); - -/** * block_bio_backmerge - merging block operation to the end of an existing operation * @bio: new block operation to merge * diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 549ab3b41961..bebc252db865 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -143,7 +143,6 @@ FLUSH_STATES #define EXTENT_FLAGS \ { EXTENT_DIRTY, "DIRTY"}, \ - { EXTENT_UPTODATE, "UPTODATE"}, \ { EXTENT_LOCKED, "LOCKED"}, \ { EXTENT_NEW, "NEW"}, \ { EXTENT_DELALLOC, "DELALLOC"}, \ @@ -224,8 +223,7 @@ DECLARE_EVENT_CLASS(btrfs__inode, __entry->generation = BTRFS_I(inode)->generation; __entry->last_trans = BTRFS_I(inode)->last_trans; __entry->logged_trans = BTRFS_I(inode)->logged_trans; - __entry->root_objectid = - BTRFS_I(inode)->root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root); ), TP_printk_btrfs("root=%llu(%s) gen=%llu ino=%llu blocks=%llu " @@ -297,7 +295,7 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, ), TP_fast_assign_btrfs(root->fs_info, - __entry->root_objectid = root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(root); __entry->ino = btrfs_ino(inode); __entry->start = map->start; __entry->len = map->len; @@ -376,7 +374,7 @@ DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular, ), TP_fast_assign_btrfs(bi->root->fs_info, - __entry->root_obj = bi->root->root_key.objectid; + __entry->root_obj = btrfs_root_id(bi->root); __entry->ino = btrfs_ino(bi); __entry->isize = bi->vfs_inode.i_size; __entry->disk_isize = bi->disk_i_size; @@ -427,7 +425,7 @@ DECLARE_EVENT_CLASS( TP_fast_assign_btrfs( bi->root->fs_info, - __entry->root_obj = bi->root->root_key.objectid; + __entry->root_obj = btrfs_root_id(bi->root); __entry->ino = btrfs_ino(bi); __entry->isize = bi->vfs_inode.i_size; __entry->disk_isize = bi->disk_i_size; @@ -527,7 +525,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent, __entry->flags = ordered->flags; __entry->compress_type = ordered->compress_type; __entry->refs = refcount_read(&ordered->refs); - __entry->root_objectid = inode->root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(inode->root); __entry->truncated_len = ordered->truncated_len; ), @@ -664,7 +662,7 @@ TRACE_EVENT(btrfs_finish_ordered_extent, __entry->start = start; __entry->len = len; __entry->uptodate = uptodate; - __entry->root_objectid = inode->root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(inode->root); ), TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu uptodate=%d", @@ -705,8 +703,7 @@ DECLARE_EVENT_CLASS(btrfs__writepage, __entry->for_reclaim = wbc->for_reclaim; __entry->range_cyclic = wbc->range_cyclic; __entry->writeback_index = inode->i_mapping->writeback_index; - __entry->root_objectid = - BTRFS_I(inode)->root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root); ), TP_printk_btrfs("root=%llu(%s) ino=%llu page_index=%lu " @@ -750,7 +747,7 @@ TRACE_EVENT(btrfs_writepage_end_io_hook, __entry->start = start; __entry->end = end; __entry->uptodate = uptodate; - __entry->root_objectid = inode->root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(inode->root); ), TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu end=%llu uptodate=%d", @@ -780,8 +777,7 @@ TRACE_EVENT(btrfs_sync_file, __entry->ino = btrfs_ino(BTRFS_I(inode)); __entry->parent = btrfs_ino(BTRFS_I(d_inode(dentry->d_parent))); __entry->datasync = datasync; - __entry->root_objectid = - BTRFS_I(inode)->root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root); ), TP_printk_btrfs("root=%llu(%s) ino=%llu parent=%llu datasync=%d", @@ -1052,7 +1048,7 @@ DECLARE_EVENT_CLASS(btrfs__chunk, __entry->sub_stripes = map->sub_stripes; __entry->offset = offset; __entry->size = size; - __entry->root_objectid = fs_info->chunk_root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(fs_info->chunk_root); ), TP_printk_btrfs("root=%llu(%s) offset=%llu size=%llu " @@ -1097,7 +1093,7 @@ TRACE_EVENT(btrfs_cow_block, ), TP_fast_assign_btrfs(root->fs_info, - __entry->root_objectid = root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(root); __entry->buf_start = buf->start; __entry->refs = atomic_read(&buf->refs); __entry->cow_start = cow->start; @@ -1240,7 +1236,7 @@ DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_free, TP_ARGS(fs_info, start, len) ); -TRACE_EVENT(find_free_extent, +TRACE_EVENT(btrfs_find_free_extent, TP_PROTO(const struct btrfs_root *root, const struct find_free_extent_ctl *ffe_ctl), @@ -1255,7 +1251,7 @@ TRACE_EVENT(find_free_extent, ), TP_fast_assign_btrfs(root->fs_info, - __entry->root_objectid = root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(root); __entry->num_bytes = ffe_ctl->num_bytes; __entry->empty_size = ffe_ctl->empty_size; __entry->flags = ffe_ctl->flags; @@ -1268,7 +1264,7 @@ TRACE_EVENT(find_free_extent, BTRFS_GROUP_FLAGS)) ); -TRACE_EVENT(find_free_extent_search_loop, +TRACE_EVENT(btrfs_find_free_extent_search_loop, TP_PROTO(const struct btrfs_root *root, const struct find_free_extent_ctl *ffe_ctl), @@ -1284,7 +1280,7 @@ TRACE_EVENT(find_free_extent_search_loop, ), TP_fast_assign_btrfs(root->fs_info, - __entry->root_objectid = root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(root); __entry->num_bytes = ffe_ctl->num_bytes; __entry->empty_size = ffe_ctl->empty_size; __entry->flags = ffe_ctl->flags; @@ -1298,7 +1294,7 @@ TRACE_EVENT(find_free_extent_search_loop, __entry->loop) ); -TRACE_EVENT(find_free_extent_have_block_group, +TRACE_EVENT(btrfs_find_free_extent_have_block_group, TP_PROTO(const struct btrfs_root *root, const struct find_free_extent_ctl *ffe_ctl, @@ -1318,7 +1314,7 @@ TRACE_EVENT(find_free_extent_have_block_group, ), TP_fast_assign_btrfs(root->fs_info, - __entry->root_objectid = root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(root); __entry->num_bytes = ffe_ctl->num_bytes; __entry->empty_size = ffe_ctl->empty_size; __entry->flags = ffe_ctl->flags; @@ -1480,7 +1476,7 @@ TRACE_EVENT(btrfs_setup_cluster, ); struct extent_state; -TRACE_EVENT(alloc_extent_state, +TRACE_EVENT(btrfs_alloc_extent_state, TP_PROTO(const struct extent_state *state, gfp_t mask, unsigned long IP), @@ -1503,7 +1499,7 @@ TRACE_EVENT(alloc_extent_state, show_gfp_flags(__entry->mask), __entry->ip) ); -TRACE_EVENT(free_extent_state, +TRACE_EVENT(btrfs_free_extent_state, TP_PROTO(const struct extent_state *state, unsigned long IP), @@ -1672,8 +1668,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_rsv_data, ), TP_fast_assign_btrfs(btrfs_sb(inode->i_sb), - __entry->rootid = - BTRFS_I(inode)->root->root_key.objectid; + __entry->rootid = btrfs_root_id(BTRFS_I(inode)->root); __entry->ino = btrfs_ino(BTRFS_I(inode)); __entry->start = start; __entry->len = len; @@ -1744,7 +1739,7 @@ DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_trace_extent, TP_ARGS(fs_info, rec, bytenr) ); -TRACE_EVENT(qgroup_num_dirty_extents, +TRACE_EVENT(btrfs_qgroup_num_dirty_extents, TP_PROTO(const struct btrfs_fs_info *fs_info, u64 transid, u64 num_dirty_extents), @@ -1798,7 +1793,7 @@ TRACE_EVENT(btrfs_qgroup_account_extent, __entry->nr_new_roots) ); -TRACE_EVENT(qgroup_update_counters, +TRACE_EVENT(btrfs_qgroup_update_counters, TP_PROTO(const struct btrfs_fs_info *fs_info, const struct btrfs_qgroup *qgroup, @@ -1827,7 +1822,7 @@ TRACE_EVENT(qgroup_update_counters, __entry->cur_old_count, __entry->cur_new_count) ); -TRACE_EVENT(qgroup_update_reserve, +TRACE_EVENT(btrfs_qgroup_update_reserve, TP_PROTO(const struct btrfs_fs_info *fs_info, const struct btrfs_qgroup *qgroup, s64 diff, int type), @@ -1853,7 +1848,7 @@ TRACE_EVENT(qgroup_update_reserve, __entry->cur_reserved, __entry->diff) ); -TRACE_EVENT(qgroup_meta_reserve, +TRACE_EVENT(btrfs_qgroup_meta_reserve, TP_PROTO(const struct btrfs_root *root, s64 diff, int type), @@ -1866,7 +1861,7 @@ TRACE_EVENT(qgroup_meta_reserve, ), TP_fast_assign_btrfs(root->fs_info, - __entry->refroot = root->root_key.objectid; + __entry->refroot = btrfs_root_id(root); __entry->diff = diff; __entry->type = type; ), @@ -1876,7 +1871,7 @@ TRACE_EVENT(qgroup_meta_reserve, __print_symbolic(__entry->type, QGROUP_RSV_TYPES), __entry->diff) ); -TRACE_EVENT(qgroup_meta_convert, +TRACE_EVENT(btrfs_qgroup_meta_convert, TP_PROTO(const struct btrfs_root *root, s64 diff), @@ -1888,7 +1883,7 @@ TRACE_EVENT(qgroup_meta_convert, ), TP_fast_assign_btrfs(root->fs_info, - __entry->refroot = root->root_key.objectid; + __entry->refroot = btrfs_root_id(root); __entry->diff = diff; ), @@ -1899,7 +1894,7 @@ TRACE_EVENT(qgroup_meta_convert, __entry->diff) ); -TRACE_EVENT(qgroup_meta_free_all_pertrans, +TRACE_EVENT(btrfs_qgroup_meta_free_all_pertrans, TP_PROTO(struct btrfs_root *root), @@ -1912,7 +1907,7 @@ TRACE_EVENT(qgroup_meta_free_all_pertrans, ), TP_fast_assign_btrfs(root->fs_info, - __entry->refroot = root->root_key.objectid; + __entry->refroot = btrfs_root_id(root); spin_lock(&root->qgroup_meta_rsv_lock); __entry->diff = -(s64)root->qgroup_meta_rsv_pertrans; spin_unlock(&root->qgroup_meta_rsv_lock); @@ -1928,7 +1923,7 @@ DECLARE_EVENT_CLASS(btrfs__prelim_ref, TP_PROTO(const struct btrfs_fs_info *fs_info, const struct prelim_ref *oldref, const struct prelim_ref *newref, u64 tree_size), - TP_ARGS(fs_info, newref, oldref, tree_size), + TP_ARGS(fs_info, oldref, newref, tree_size), TP_STRUCT__entry_btrfs( __field( u64, root_id ) @@ -1994,7 +1989,7 @@ TRACE_EVENT(btrfs_inode_mod_outstanding_extents, ), TP_fast_assign_btrfs(root->fs_info, - __entry->root_objectid = root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(root); __entry->ino = ino; __entry->mod = mod; __entry->outstanding = outstanding; @@ -2074,12 +2069,12 @@ TRACE_EVENT(btrfs_set_extent_bit, __field( unsigned, set_bits) ), - TP_fast_assign_btrfs(extent_io_tree_to_fs_info(tree), - const struct btrfs_inode *inode = extent_io_tree_to_inode_const(tree); + TP_fast_assign_btrfs(btrfs_extent_io_tree_to_fs_info(tree), + const struct btrfs_inode *inode = btrfs_extent_io_tree_to_inode(tree); __entry->owner = tree->owner; __entry->ino = inode ? btrfs_ino(inode) : 0; - __entry->rootid = inode ? inode->root->root_key.objectid : 0; + __entry->rootid = inode ? btrfs_root_id(inode->root) : 0; __entry->start = start; __entry->len = len; __entry->set_bits = set_bits; @@ -2107,12 +2102,12 @@ TRACE_EVENT(btrfs_clear_extent_bit, __field( unsigned, clear_bits) ), - TP_fast_assign_btrfs(extent_io_tree_to_fs_info(tree), - const struct btrfs_inode *inode = extent_io_tree_to_inode_const(tree); + TP_fast_assign_btrfs(btrfs_extent_io_tree_to_fs_info(tree), + const struct btrfs_inode *inode = btrfs_extent_io_tree_to_inode(tree); __entry->owner = tree->owner; __entry->ino = inode ? btrfs_ino(inode) : 0; - __entry->rootid = inode ? inode->root->root_key.objectid : 0; + __entry->rootid = inode ? btrfs_root_id(inode->root) : 0; __entry->start = start; __entry->len = len; __entry->clear_bits = clear_bits; @@ -2141,12 +2136,12 @@ TRACE_EVENT(btrfs_convert_extent_bit, __field( unsigned, clear_bits) ), - TP_fast_assign_btrfs(extent_io_tree_to_fs_info(tree), - const struct btrfs_inode *inode = extent_io_tree_to_inode_const(tree); + TP_fast_assign_btrfs(btrfs_extent_io_tree_to_fs_info(tree), + const struct btrfs_inode *inode = btrfs_extent_io_tree_to_inode(tree); __entry->owner = tree->owner; __entry->ino = inode ? btrfs_ino(inode) : 0; - __entry->rootid = inode ? inode->root->root_key.objectid : 0; + __entry->rootid = inode ? btrfs_root_id(inode->root) : 0; __entry->start = start; __entry->len = len; __entry->set_bits = set_bits; @@ -2341,11 +2336,7 @@ DEFINE_EVENT(btrfs_locking_events, name, \ DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_unlock); DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock); -DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock_blocking); -DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_read); -DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_write); DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_read_lock); -DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_lock_atomic); DECLARE_EVENT_CLASS(btrfs__space_info_update, @@ -2621,7 +2612,7 @@ TRACE_EVENT(btrfs_extent_map_shrinker_remove_em, TP_fast_assign_btrfs(inode->root->fs_info, __entry->ino = btrfs_ino(inode); - __entry->root_id = inode->root->root_key.objectid; + __entry->root_id = btrfs_root_id(inode->root); __entry->start = em->start; __entry->len = em->len; __entry->flags = em->flags; diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index c69c7b1e41d1..a5f4b9234f46 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -113,7 +113,7 @@ TRACE_EVENT(erofs_read_folio, __entry->raw) ); -TRACE_EVENT(erofs_readpages, +TRACE_EVENT(erofs_readahead, TP_PROTO(struct inode *inode, pgoff_t start, unsigned int nrpage, bool raw), diff --git a/include/trace/events/exceptions.h b/include/trace/events/exceptions.h new file mode 100644 index 000000000000..a631f8de8917 --- /dev/null +++ b/include/trace/events/exceptions.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM exceptions + +#if !defined(_TRACE_PAGE_FAULT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PAGE_FAULT_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(exceptions, + + TP_PROTO(unsigned long address, struct pt_regs *regs, + unsigned long error_code), + + TP_ARGS(address, regs, error_code), + + TP_STRUCT__entry( + __field( unsigned long, address ) + __field( unsigned long, ip ) + __field( unsigned long, error_code ) + ), + + TP_fast_assign( + __entry->address = address; + __entry->ip = instruction_pointer(regs); + __entry->error_code = error_code; + ), + + TP_printk("address=%ps ip=%ps error_code=0x%lx", + (void *)__entry->address, (void *)__entry->ip, + __entry->error_code) ); + +DEFINE_EVENT(exceptions, page_fault_user, + TP_PROTO(unsigned long address, struct pt_regs *regs, unsigned long error_code), + TP_ARGS(address, regs, error_code)); +DEFINE_EVENT(exceptions, page_fault_kernel, + TP_PROTO(unsigned long address, struct pt_regs *regs, unsigned long error_code), + TP_ARGS(address, regs, error_code)); + +#endif /* _TRACE_PAGE_FAULT_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index fb81c533b310..178ab6f611be 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -645,7 +645,7 @@ TRACE_EVENT(io_uring_short_write, /* * io_uring_local_work_run - ran ring local task work * - * @tctx: pointer to a io_uring_ctx + * @ctx: pointer to an io_ring_ctx * @count: how many functions it ran * @loops: how many loops it ran * diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 8994e97d86c1..3bec9fb73a36 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -326,11 +326,37 @@ DEFINE_EVENT(sched_process_template, sched_process_free, TP_ARGS(p)); /* - * Tracepoint for a task exiting: + * Tracepoint for a task exiting. + * Note, it's a superset of sched_process_template and should be kept + * compatible as much as possible. sched_process_exits has an extra + * `group_dead` argument, so sched_process_template can't be used, + * unfortunately, just like sched_migrate_task above. */ -DEFINE_EVENT(sched_process_template, sched_process_exit, - TP_PROTO(struct task_struct *p), - TP_ARGS(p)); +TRACE_EVENT(sched_process_exit, + + TP_PROTO(struct task_struct *p, bool group_dead), + + TP_ARGS(p, group_dead), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( bool, group_dead ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ + __entry->group_dead = group_dead; + ), + + TP_printk("comm=%s pid=%d prio=%d group_dead=%s", + __entry->comm, __entry->pid, __entry->prio, + __entry->group_dead ? "true" : "false" + ) +); /* * Tracepoint for waiting on task to unschedule: diff --git a/include/uapi/cxl/features.h b/include/uapi/cxl/features.h index d6db8984889f..490606d7694b 100644 --- a/include/uapi/cxl/features.h +++ b/include/uapi/cxl/features.h @@ -8,10 +8,19 @@ #define _UAPI_CXL_FEATURES_H_ #include <linux/types.h> -#ifndef __KERNEL__ -#include <uuid/uuid.h> -#else + +typedef unsigned char __uapi_uuid_t[16]; + +#ifdef __KERNEL__ #include <linux/uuid.h> +/* + * Note, __uapi_uuid_t is 1-byte aligned on modern compilers and 4-byte + * aligned on others. Ensure that __uapi_uuid_t in a struct is placed at + * a 4-byte aligned offset, or the structure is packed, to ensure + * consistent padding. + */ +static_assert(sizeof(__uapi_uuid_t) == sizeof(uuid_t)); +#define __uapi_uuid_t uuid_t #endif /* @@ -60,7 +69,7 @@ struct cxl_mbox_get_sup_feats_in { * Get Supported Features Supported Feature Entry */ struct cxl_feat_entry { - uuid_t uuid; + __uapi_uuid_t uuid; __le16 id; __le16 get_feat_size; __le16 set_feat_size; @@ -110,7 +119,7 @@ struct cxl_mbox_get_sup_feats_out { * CXL spec r3.2 section 8.2.9.6.2 Table 8-99 */ struct cxl_mbox_get_feat_in { - uuid_t uuid; + __uapi_uuid_t uuid; __le16 offset; __le16 count; __u8 selection; @@ -143,7 +152,7 @@ enum cxl_get_feat_selection { */ struct cxl_mbox_set_feat_in { __struct_group(cxl_mbox_set_feat_hdr, hdr, /* no attrs */, - uuid_t uuid; + __uapi_uuid_t uuid; __le32 flags; __le16 offset; __u8 version; diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 746c43bd3eb6..2f24103f4533 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __UAPI_IVPU_DRM_H__ @@ -147,7 +147,7 @@ struct drm_ivpu_param { * platform type when executing on a simulator or emulator (read-only) * * %DRM_IVPU_PARAM_CORE_CLOCK_RATE: - * Current PLL frequency (read-only) + * Maximum frequency of the NPU data processing unit clock (read-only) * * %DRM_IVPU_PARAM_NUM_CONTEXTS: * Maximum number of simultaneously existing contexts (read-only) diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index 690621b610e5..1bfb635e309b 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -49,7 +49,7 @@ enum blktrace_act { __BLK_TA_UNPLUG_TIMER, /* queue was unplugged by timer */ __BLK_TA_INSERT, /* insert request */ __BLK_TA_SPLIT, /* bio was split */ - __BLK_TA_BOUNCE, /* bio was bounced */ + __BLK_TA_BOUNCE, /* unused, was: bio was bounced */ __BLK_TA_REMAP, /* bio was remapped */ __BLK_TA_ABORT, /* request aborted */ __BLK_TA_DRV_DATA, /* driver-specific binary data */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 28705ae67784..fd404729b115 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4968,6 +4968,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index fe24c3459ac0..30c8dad6214e 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -31,11 +31,6 @@ enum ethtool_header_flags { ETHTOOL_FLAG_STATS = 4, }; -enum { - ETHTOOL_PHY_UPSTREAM_TYPE_MAC, - ETHTOOL_PHY_UPSTREAM_TYPE_PHY, -}; - enum ethtool_tcp_data_split { ETHTOOL_TCP_DATA_SPLIT_UNKNOWN, ETHTOOL_TCP_DATA_SPLIT_DISABLED, diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index 7a8f4c290187..3aff99f2696a 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -119,7 +119,7 @@ struct fscrypt_key_specifier { */ struct fscrypt_provisioning_key_payload { __u32 type; - __u32 __reserved; + __u32 flags; __u8 raw[]; }; @@ -128,7 +128,9 @@ struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; __u32 key_id; - __u32 __reserved[8]; +#define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001 + __u32 flags; + __u32 __reserved[7]; __u8 raw[]; }; diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index d2ee625ea189..7e2744ec8933 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -63,7 +63,7 @@ #define FUTEX2_SIZE_U32 0x02 #define FUTEX2_SIZE_U64 0x03 #define FUTEX2_NUMA 0x04 - /* 0x08 */ +#define FUTEX2_MPOL 0x08 /* 0x10 */ /* 0x20 */ /* 0x40 */ @@ -75,6 +75,13 @@ #define FUTEX_32 FUTEX2_SIZE_U32 /* historical accident :-( */ /* + * When FUTEX2_NUMA doubles the futex word, the second word is a node value. + * The special value -1 indicates no-node. This is the same value as + * NUMA_NO_NODE, except that value is not ABI, this is. + */ +#define FUTEX_NO_NODE (-1) + +/* * Max numbers of elements in a futex_waitv array */ #define FUTEX_WAITV_MAX 128 diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ed2beb4def3f..cfd17e382082 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -73,6 +73,7 @@ struct io_uring_sqe { __u32 futex_flags; __u32 install_fd_flags; __u32 nop_flags; + __u32 pipe_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -93,6 +94,10 @@ struct io_uring_sqe { __u16 addr_len; __u16 __pad3[1]; }; + struct { + __u8 write_stream; + __u8 __pad4[3]; + }; }; union { struct { @@ -283,6 +288,7 @@ enum io_uring_op { IORING_OP_EPOLL_WAIT, IORING_OP_READV_FIXED, IORING_OP_WRITEV_FIXED, + IORING_OP_PIPE, /* this goes last, obviously */ IORING_OP_LAST, @@ -988,12 +994,16 @@ struct io_uring_zcrx_offsets { __u64 __resv[2]; }; +enum io_uring_zcrx_area_flags { + IORING_ZCRX_AREA_DMABUF = 1, +}; + struct io_uring_zcrx_area_reg { __u64 addr; __u64 len; __u64 rq_area_token; __u32 flags; - __u32 __resv1; + __u32 dmabuf_fd; __u64 __resv2[2]; }; @@ -1010,7 +1020,9 @@ struct io_uring_zcrx_ifq_reg { __u64 region_ptr; /* struct io_uring_region_desc * */ struct io_uring_zcrx_offsets offsets; - __u64 __resv[4]; + __u32 zcrx_id; + __u32 __resv2; + __u64 __resv[3]; }; #ifdef __cplusplus diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index d9d0cb827117..f030adc462ee 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -53,43 +53,70 @@ struct landlock_ruleset_attr { __u64 scoped; }; -/* - * sys_landlock_create_ruleset() flags: +/** + * DOC: landlock_create_ruleset_flags + * + * **Flags** * - * - %LANDLOCK_CREATE_RULESET_VERSION: Get the highest supported Landlock ABI - * version. - * - %LANDLOCK_CREATE_RULESET_ERRATA: Get a bitmask of fixed issues. + * %LANDLOCK_CREATE_RULESET_VERSION + * Get the highest supported Landlock ABI version (starting at 1). + * + * %LANDLOCK_CREATE_RULESET_ERRATA + * Get a bitmask of fixed issues for the current Landlock ABI version. */ /* clang-format off */ #define LANDLOCK_CREATE_RULESET_VERSION (1U << 0) #define LANDLOCK_CREATE_RULESET_ERRATA (1U << 1) /* clang-format on */ -/* - * sys_landlock_restrict_self() flags: - * - * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF: Do not create any log related to the - * enforced restrictions. This should only be set by tools launching unknown - * or untrusted programs (e.g. a sandbox tool, container runtime, system - * service manager). Because programs sandboxing themselves should fix any - * denied access, they should not set this flag to be aware of potential - * issues reported by system's logs (i.e. audit). - * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON: Explicitly ask to continue - * logging denied access requests even after an :manpage:`execve(2)` call. - * This flag should only be set if all the programs than can legitimately be - * executed will not try to request a denied access (which could spam audit - * logs). - * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF: Do not create any log related - * to the enforced restrictions coming from future nested domains created by - * the caller or its descendants. This should only be set according to a - * runtime configuration (i.e. not hardcoded) by programs launching other - * unknown or untrusted programs that may create their own Landlock domains - * and spam logs. The main use case is for container runtimes to enable users - * to mute buggy sandboxed programs for a specific container image. Other use - * cases include sandboxer tools and init systems. Unlike - * %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF, - * %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF does not impact the requested - * restriction (if any) but only the future nested domains. +/** + * DOC: landlock_restrict_self_flags + * + * **Flags** + * + * By default, denied accesses originating from programs that sandbox themselves + * are logged via the audit subsystem. Such events typically indicate unexpected + * behavior, such as bugs or exploitation attempts. However, to avoid excessive + * logging, access requests denied by a domain not created by the originating + * program are not logged by default. The rationale is that programs should know + * their own behavior, but not necessarily the behavior of other programs. This + * default configuration is suitable for most programs that sandbox themselves. + * For specific use cases, the following flags allow programs to modify this + * default logging behavior. + * + * The %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF and + * %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON flags apply to the newly created + * Landlock domain. + * + * %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF + * Disables logging of denied accesses originating from the thread creating + * the Landlock domain, as well as its children, as long as they continue + * running the same executable code (i.e., without an intervening + * :manpage:`execve(2)` call). This is intended for programs that execute + * unknown code without invoking :manpage:`execve(2)`, such as script + * interpreters. Programs that only sandbox themselves should not set this + * flag, so users can be notified of unauthorized access attempts via system + * logs. + * + * %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON + * Enables logging of denied accesses after an :manpage:`execve(2)` call, + * providing visibility into unauthorized access attempts by newly executed + * programs within the created Landlock domain. This flag is recommended + * only when all potential executables in the domain are expected to comply + * with the access restrictions, as excessive audit log entries could make + * it more difficult to identify critical events. + * + * %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF + * Disables logging of denied accesses originating from nested Landlock + * domains created by the caller or its descendants. This flag should be set + * according to runtime configuration, not hardcoded, to avoid suppressing + * important security events. It is useful for container runtimes or + * sandboxing tools that may launch programs which themselves create + * Landlock domains and could otherwise generate excessive logs. Unlike + * ``LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF``, this flag only affects + * future nested domains, not the one being created. It can also be used + * with a @ruleset_fd value of -1 to mute subdomain logs without creating a + * domain. */ /* clang-format off */ #define LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF (1U << 0) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 5fc753c23734..78a362b80027 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -39,18 +39,21 @@ enum perf_type_id { /* * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE + * * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA * AA: hardware event ID * EEEEEEEE: PMU type ID + * * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB * BB: hardware cache ID * CC: hardware cache op ID * DD: hardware cache op result ID * EEEEEEEE: PMU type ID - * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. + * + * If the PMU type ID is 0, PERF_TYPE_RAW will be applied. */ -#define PERF_PMU_TYPE_SHIFT 32 -#define PERF_HW_EVENT_MASK 0xffffffff +#define PERF_PMU_TYPE_SHIFT 32 +#define PERF_HW_EVENT_MASK 0xffffffff /* * Generalized performance event event_id types, used by the @@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id { /* * Special "software" events provided by the kernel, even if the hardware * does not support performance events. These events measure various - * physical and sw events of the kernel (and allow the profiling of them as + * physical and SW events of the kernel (and allow the profiling of them as * well): */ enum perf_sw_ids { @@ -167,8 +170,9 @@ enum perf_event_sample_format { }; #define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) + /* - * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set + * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set. * * If the user does not pass priv level information via branch_sample_type, * the kernel uses the event's priv level. Branch and event priv levels do @@ -178,20 +182,20 @@ enum perf_event_sample_format { * of branches and therefore it supersedes all the other types. */ enum perf_branch_sample_type_shift { - PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ - PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ - PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ - - PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ - PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ - PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ - PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ - PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ - PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ - PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ - PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */ PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */ @@ -210,96 +214,95 @@ enum perf_branch_sample_type_shift { }; enum perf_branch_sample_type { - PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, - PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, - PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, + PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, + PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, + PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, - PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, - PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, - PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, - PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, - PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, - PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, + PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, + PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, - PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, - PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, - PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, + PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, + PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, - PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, - PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, + PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, + PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, - PERF_SAMPLE_BRANCH_TYPE_SAVE = - 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, - PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, - PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; /* - * Common flow change classification + * Common control flow change classifications: */ enum { - PERF_BR_UNKNOWN = 0, /* unknown */ - PERF_BR_COND = 1, /* conditional */ - PERF_BR_UNCOND = 2, /* unconditional */ - PERF_BR_IND = 3, /* indirect */ - PERF_BR_CALL = 4, /* function call */ - PERF_BR_IND_CALL = 5, /* indirect function call */ - PERF_BR_RET = 6, /* function return */ - PERF_BR_SYSCALL = 7, /* syscall */ - PERF_BR_SYSRET = 8, /* syscall return */ - PERF_BR_COND_CALL = 9, /* conditional function call */ - PERF_BR_COND_RET = 10, /* conditional function return */ - PERF_BR_ERET = 11, /* exception return */ - PERF_BR_IRQ = 12, /* irq */ - PERF_BR_SERROR = 13, /* system error */ - PERF_BR_NO_TX = 14, /* not in transaction */ - PERF_BR_EXTEND_ABI = 15, /* extend ABI */ + PERF_BR_UNKNOWN = 0, /* Unknown */ + PERF_BR_COND = 1, /* Conditional */ + PERF_BR_UNCOND = 2, /* Unconditional */ + PERF_BR_IND = 3, /* Indirect */ + PERF_BR_CALL = 4, /* Function call */ + PERF_BR_IND_CALL = 5, /* Indirect function call */ + PERF_BR_RET = 6, /* Function return */ + PERF_BR_SYSCALL = 7, /* Syscall */ + PERF_BR_SYSRET = 8, /* Syscall return */ + PERF_BR_COND_CALL = 9, /* Conditional function call */ + PERF_BR_COND_RET = 10, /* Conditional function return */ + PERF_BR_ERET = 11, /* Exception return */ + PERF_BR_IRQ = 12, /* IRQ */ + PERF_BR_SERROR = 13, /* System error */ + PERF_BR_NO_TX = 14, /* Not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* Extend ABI */ PERF_BR_MAX, }; /* - * Common branch speculation outcome classification + * Common branch speculation outcome classifications: */ enum { - PERF_BR_SPEC_NA = 0, /* Not available */ - PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ - PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ - PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ + PERF_BR_SPEC_NA = 0, /* Not available */ + PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ + PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ + PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ PERF_BR_SPEC_MAX, }; enum { - PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ - PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ - PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ - PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ - PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ - PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ - PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ - PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ PERF_BR_NEW_MAX, }; enum { - PERF_BR_PRIV_UNKNOWN = 0, - PERF_BR_PRIV_USER = 1, - PERF_BR_PRIV_KERNEL = 2, - PERF_BR_PRIV_HV = 3, + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, }; -#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 -#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 -#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 -#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 -#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ @@ -310,9 +313,9 @@ enum { * Values to determine ABI of the registers dump. */ enum perf_sample_regs_abi { - PERF_SAMPLE_REGS_ABI_NONE = 0, - PERF_SAMPLE_REGS_ABI_32 = 1, - PERF_SAMPLE_REGS_ABI_64 = 2, + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, }; /* @@ -320,21 +323,21 @@ enum perf_sample_regs_abi { * abort events. Multiple bits can be set. */ enum { - PERF_TXN_ELISION = (1 << 0), /* From elision */ - PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ - PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ - PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ - PERF_TXN_RETRY = (1 << 4), /* Retry possible */ - PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ - PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ - PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ + PERF_TXN_ELISION = (1 << 0), /* From elision */ + PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ + PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ + PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */ + PERF_TXN_RETRY = (1 << 4), /* Retry possible */ + PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ + PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ + PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ - PERF_TXN_MAX = (1 << 8), /* non-ABI */ + PERF_TXN_MAX = (1 << 8), /* non-ABI */ - /* bits 32..63 are reserved for the abort code */ + /* Bits 32..63 are reserved for the abort code */ - PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), - PERF_TXN_ABORT_SHIFT = 32, + PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), + PERF_TXN_ABORT_SHIFT = 32, }; /* @@ -369,24 +372,22 @@ enum perf_event_read_format { PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; -#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ -#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ -#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ -#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ - /* add: sample_stack_user */ -#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ -#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ -#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ -#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ -#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */ +#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */ +#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */ +#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */ + /* Add: sample_stack_user */ +#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */ +#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */ +#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */ +#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */ /* - * Hardware event_id to monitor via a performance monitoring event: - * - * @sample_max_stack: Max number of frame pointers in a callchain, - * should be < /proc/sys/kernel/perf_event_max_stack - * Max number of entries of branch stack - * should be < hardware limit + * 'struct perf_event_attr' contains various attributes that define + * a performance event - most of them hardware related configuration + * details, but also a lot of behavioral switches and values implemented + * by the kernel. */ struct perf_event_attr { @@ -396,7 +397,7 @@ struct perf_event_attr { __u32 type; /* - * Size of the attr structure, for fwd/bwd compat. + * Size of the attr structure, for forward/backwards compatibility. */ __u32 size; @@ -451,21 +452,21 @@ struct perf_event_attr { comm_exec : 1, /* flag comm events that are due to an exec */ use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ - write_backward : 1, /* Write ring buffer from end to beginning */ + write_backward : 1, /* write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ ksymbol : 1, /* include ksymbol events */ - bpf_event : 1, /* include bpf events */ + bpf_event : 1, /* include BPF events */ aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ - build_id : 1, /* use build id in mmap2 events */ + build_id : 1, /* use build ID in mmap2 events */ inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ __reserved_1 : 26; union { - __u32 wakeup_events; /* wakeup every n events */ + __u32 wakeup_events; /* wake up every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; @@ -474,13 +475,13 @@ struct perf_event_attr { __u64 bp_addr; __u64 kprobe_func; /* for perf_kprobe */ __u64 uprobe_path; /* for perf_uprobe */ - __u64 config1; /* extension of config */ + __u64 config1; /* extension of config */ }; union { __u64 bp_len; - __u64 kprobe_addr; /* when kprobe_func == NULL */ + __u64 kprobe_addr; /* when kprobe_func == NULL */ __u64 probe_offset; /* for perf_[k,u]probe */ - __u64 config2; /* extension of config1 */ + __u64 config2; /* extension of config1 */ }; __u64 branch_sample_type; /* enum perf_branch_sample_type */ @@ -510,7 +511,16 @@ struct perf_event_attr { * Wakeup watermark for AUX area */ __u32 aux_watermark; + + /* + * Max number of frame pointers in a callchain, should be + * lower than /proc/sys/kernel/perf_event_max_stack. + * + * Max number of entries of branch stack should be lower + * than the hardware limit. + */ __u16 sample_max_stack; + __u16 __reserved_2; __u32 aux_sample_size; @@ -537,7 +547,7 @@ struct perf_event_attr { /* * Structure used by below PERF_EVENT_IOC_QUERY_BPF command - * to query bpf programs attached to the same perf tracepoint + * to query BPF programs attached to the same perf tracepoint * as the given perf event. */ struct perf_event_query_bpf { @@ -559,21 +569,21 @@ struct perf_event_query_bpf { /* * Ioctls that can be done on a perf event fd: */ -#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) -#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) -#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) -#define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) -#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) -#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) -#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) -#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *) +#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32) +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32) #define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) -#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *) +#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *) enum perf_event_ioc_flags { - PERF_IOC_FLAG_GROUP = 1U << 0, + PERF_IOC_FLAG_GROUP = 1U << 0, }; /* @@ -584,7 +594,7 @@ struct perf_event_mmap_page { __u32 compat_version; /* lowest version this is compat with */ /* - * Bits needed to read the hw events in user-space. + * Bits needed to read the HW events in user-space. * * u32 seq, time_mult, time_shift, index, width; * u64 count, enabled, running; @@ -622,7 +632,7 @@ struct perf_event_mmap_page { __u32 index; /* hardware event identifier */ __s64 offset; /* add to hardware event value */ __u64 time_enabled; /* time event active */ - __u64 time_running; /* time event on cpu */ + __u64 time_running; /* time event on CPU */ union { __u64 capabilities; struct { @@ -650,7 +660,7 @@ struct perf_event_mmap_page { /* * If cap_usr_time the below fields can be used to compute the time - * delta since time_enabled (in ns) using rdtsc or similar. + * delta since time_enabled (in ns) using RDTSC or similar. * * u64 quot, rem; * u64 delta; @@ -723,7 +733,7 @@ struct perf_event_mmap_page { * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data, after issueing + * written by user-space to reflect the last read data, after issuing * an smp_mb() to separate the data read from the ->data_tail store. * In this case the kernel will not over-write unread data. * @@ -739,7 +749,7 @@ struct perf_event_mmap_page { /* * AUX area is defined by aux_{offset,size} fields that should be set - * by the userspace, so that + * by the user-space, so that * * aux_offset >= data_offset + data_size * @@ -813,7 +823,7 @@ struct perf_event_mmap_page { * Indicates that thread was preempted in TASK_RUNNING state. * * PERF_RECORD_MISC_MMAP_BUILD_ID: - * Indicates that mmap2 event carries build id data. + * Indicates that mmap2 event carries build ID data. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) @@ -824,26 +834,26 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) struct perf_event_header { - __u32 type; - __u16 misc; - __u16 size; + __u32 type; + __u16 misc; + __u16 size; }; struct perf_ns_link_info { - __u64 dev; - __u64 ino; + __u64 dev; + __u64 ino; }; enum { - NET_NS_INDEX = 0, - UTS_NS_INDEX = 1, - IPC_NS_INDEX = 2, - PID_NS_INDEX = 3, - USER_NS_INDEX = 4, - MNT_NS_INDEX = 5, - CGROUP_NS_INDEX = 6, - - NR_NAMESPACES, /* number of available namespaces */ + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ }; enum perf_event_type { @@ -859,11 +869,11 @@ enum perf_event_type { * optional fields being ignored. * * struct sample_id { - * { u32 pid, tid; } && PERF_SAMPLE_TID - * { u64 time; } && PERF_SAMPLE_TIME - * { u64 id; } && PERF_SAMPLE_ID - * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID - * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU * { u64 id; } && PERF_SAMPLE_IDENTIFIER * } && perf_event_attr::sample_id_all * @@ -874,7 +884,7 @@ enum perf_event_type { /* * The MMAP events record the PROT_EXEC mappings so that we can - * correlate userspace IPs to code. They have the following structure: + * correlate user-space IPs to code. They have the following structure: * * struct { * struct perf_event_header header; @@ -884,7 +894,7 @@ enum perf_event_type { * u64 len; * u64 pgoff; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP = 1, @@ -894,7 +904,7 @@ enum perf_event_type { * struct perf_event_header header; * u64 id; * u64 lost; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_LOST = 2, @@ -905,7 +915,7 @@ enum perf_event_type { * * u32 pid, tid; * char comm[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_COMM = 3, @@ -916,7 +926,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_EXIT = 4, @@ -927,7 +937,7 @@ enum perf_event_type { * u64 time; * u64 id; * u64 stream_id; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_THROTTLE = 5, @@ -939,7 +949,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_FORK = 7, @@ -950,7 +960,7 @@ enum perf_event_type { * u32 pid, tid; * * struct read_format values; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_READ = 8, @@ -1005,12 +1015,12 @@ enum perf_event_type { * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * - * { u64 abi; # enum perf_sample_regs_abi - * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER * - * { u64 size; - * char data[size]; - * u64 dyn_size; } && PERF_SAMPLE_STACK_USER + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * * { union perf_sample_weight * { @@ -1035,10 +1045,11 @@ enum perf_event_type { * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR - * { u64 size; - * char data[size]; } && PERF_SAMPLE_AUX + * { u64 cgroup;} && PERF_SAMPLE_CGROUP * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE + * { u64 size; + * char data[size]; } && PERF_SAMPLE_AUX * }; */ PERF_RECORD_SAMPLE = 9, @@ -1070,7 +1081,7 @@ enum perf_event_type { * }; * u32 prot, flags; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP2 = 10, @@ -1079,12 +1090,12 @@ enum perf_event_type { * Records that new data landed in the AUX buffer part. * * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * u64 aux_offset; - * u64 aux_size; + * u64 aux_offset; + * u64 aux_size; * u64 flags; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_AUX = 11, @@ -1167,7 +1178,7 @@ enum perf_event_type { PERF_RECORD_KSYMBOL = 17, /* - * Record bpf events: + * Record BPF events: * enum perf_bpf_event_type { * PERF_BPF_EVENT_UNKNOWN = 0, * PERF_BPF_EVENT_PROG_LOAD = 1, @@ -1245,181 +1256,181 @@ enum perf_record_ksymbol_type { #define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) enum perf_bpf_event_type { - PERF_BPF_EVENT_UNKNOWN = 0, - PERF_BPF_EVENT_PROG_LOAD = 1, - PERF_BPF_EVENT_PROG_UNLOAD = 2, - PERF_BPF_EVENT_MAX, /* non-ABI */ + PERF_BPF_EVENT_UNKNOWN = 0, + PERF_BPF_EVENT_PROG_LOAD = 1, + PERF_BPF_EVENT_PROG_UNLOAD = 2, + PERF_BPF_EVENT_MAX, /* non-ABI */ }; -#define PERF_MAX_STACK_DEPTH 127 -#define PERF_MAX_CONTEXTS_PER_STACK 8 +#define PERF_MAX_STACK_DEPTH 127 +#define PERF_MAX_CONTEXTS_PER_STACK 8 enum perf_callchain_context { - PERF_CONTEXT_HV = (__u64)-32, - PERF_CONTEXT_KERNEL = (__u64)-128, - PERF_CONTEXT_USER = (__u64)-512, + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, - PERF_CONTEXT_GUEST = (__u64)-2048, - PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, - PERF_CONTEXT_GUEST_USER = (__u64)-2560, + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, - PERF_CONTEXT_MAX = (__u64)-4095, + PERF_CONTEXT_MAX = (__u64)-4095, }; /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */ #define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ /* CoreSight PMU AUX buffer formats */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ -#define PERF_FLAG_FD_NO_GROUP (1UL << 0) -#define PERF_FLAG_FD_OUTPUT (1UL << 1) -#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ -#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#define PERF_FLAG_FD_NO_GROUP (1UL << 0) +#define PERF_FLAG_FD_OUTPUT (1UL << 1) +#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */ +#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ #if defined(__LITTLE_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_op:5, /* type of opcode */ - mem_lvl:14, /* memory hierarchy level */ - mem_snoop:5, /* snoop mode */ - mem_lock:2, /* lock instr */ - mem_dtlb:7, /* tlb access */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_remote:1, /* remote */ - mem_snoopx:2, /* snoop mode, ext */ - mem_blk:3, /* access blocked */ - mem_hops:3, /* hop level */ - mem_rsvd:18; + __u64 mem_op : 5, /* Type of opcode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_snoop : 5, /* Snoop mode */ + mem_lock : 2, /* Lock instr */ + mem_dtlb : 7, /* TLB access */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_remote : 1, /* Remote */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_blk : 3, /* Access blocked */ + mem_hops : 3, /* Hop level */ + mem_rsvd : 18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:18, - mem_hops:3, /* hop level */ - mem_blk:3, /* access blocked */ - mem_snoopx:2, /* snoop mode, ext */ - mem_remote:1, /* remote */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_dtlb:7, /* tlb access */ - mem_lock:2, /* lock instr */ - mem_snoop:5, /* snoop mode */ - mem_lvl:14, /* memory hierarchy level */ - mem_op:5; /* type of opcode */ + __u64 mem_rsvd : 18, + mem_hops : 3, /* Hop level */ + mem_blk : 3, /* Access blocked */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_remote : 1, /* Remote */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_dtlb : 7, /* TLB access */ + mem_lock : 2, /* Lock instr */ + mem_snoop : 5, /* Snoop mode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_op : 5; /* Type of opcode */ }; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif -/* type of opcode (load/store/prefetch,code) */ -#define PERF_MEM_OP_NA 0x01 /* not available */ -#define PERF_MEM_OP_LOAD 0x02 /* load instruction */ -#define PERF_MEM_OP_STORE 0x04 /* store instruction */ -#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */ -#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ -#define PERF_MEM_OP_SHIFT 0 +/* Type of memory opcode: */ +#define PERF_MEM_OP_NA 0x0001 /* Not available */ +#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */ +#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */ +#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */ +#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */ +#define PERF_MEM_OP_SHIFT 0 /* - * PERF_MEM_LVL_* namespace being depricated to some extent in the + * The PERF_MEM_LVL_* namespace is being deprecated to some extent in * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. - * Supporting this namespace inorder to not break defined ABIs. + * We support this namespace in order to not break defined ABIs. * - * memory hierarchy (memory level, hit or miss) + * Memory hierarchy (memory level, hit or miss) */ -#define PERF_MEM_LVL_NA 0x01 /* not available */ -#define PERF_MEM_LVL_HIT 0x02 /* hit level */ -#define PERF_MEM_LVL_MISS 0x04 /* miss level */ -#define PERF_MEM_LVL_L1 0x08 /* L1 */ -#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */ -#define PERF_MEM_LVL_L2 0x20 /* L2 */ -#define PERF_MEM_LVL_L3 0x40 /* L3 */ -#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */ -#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */ -#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */ -#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */ -#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */ -#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ -#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ -#define PERF_MEM_LVL_SHIFT 5 - -#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */ -#define PERF_MEM_REMOTE_SHIFT 37 - -#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */ -#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ -#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ -#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ -/* 0x7 available */ -#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ -#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ -#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ -#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ -#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ -#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ - -#define PERF_MEM_LVLNUM_SHIFT 33 - -/* snoop mode */ -#define PERF_MEM_SNOOP_NA 0x01 /* not available */ -#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ -#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ -#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ -#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ -#define PERF_MEM_SNOOP_SHIFT 19 - -#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ -#define PERF_MEM_SNOOPX_SHIFT 38 - -/* locked instruction */ -#define PERF_MEM_LOCK_NA 0x01 /* not available */ -#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ -#define PERF_MEM_LOCK_SHIFT 24 +#define PERF_MEM_LVL_NA 0x0001 /* Not available */ +#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */ +#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */ +#define PERF_MEM_LVL_L1 0x0008 /* L1 */ +#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */ +#define PERF_MEM_LVL_L2 0x0020 /* L2 */ +#define PERF_MEM_LVL_L3 0x0040 /* L3 */ +#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */ +#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */ +#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */ +#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */ +#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */ +#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ +#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ +#define PERF_MEM_LVL_SHIFT 5 + +#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */ +#define PERF_MEM_REMOTE_SHIFT 37 + +#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */ +#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */ +#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */ +#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */ +#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */ +/* 0x007 available */ +#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */ +#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */ +#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */ +#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */ +#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */ +#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */ +#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */ + +#define PERF_MEM_LVLNUM_SHIFT 33 + +/* Snoop mode */ +#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */ +#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */ +#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */ +#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */ +#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */ +#define PERF_MEM_SNOOP_SHIFT 19 + +#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */ +#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */ +#define PERF_MEM_SNOOPX_SHIFT 38 + +/* Locked instruction */ +#define PERF_MEM_LOCK_NA 0x0001 /* Not available */ +#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */ +#define PERF_MEM_LOCK_SHIFT 24 /* TLB access */ -#define PERF_MEM_TLB_NA 0x01 /* not available */ -#define PERF_MEM_TLB_HIT 0x02 /* hit level */ -#define PERF_MEM_TLB_MISS 0x04 /* miss level */ -#define PERF_MEM_TLB_L1 0x08 /* L1 */ -#define PERF_MEM_TLB_L2 0x10 /* L2 */ -#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/ -#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ -#define PERF_MEM_TLB_SHIFT 26 +#define PERF_MEM_TLB_NA 0x0001 /* Not available */ +#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */ +#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */ +#define PERF_MEM_TLB_L1 0x0008 /* L1 */ +#define PERF_MEM_TLB_L2 0x0010 /* L2 */ +#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/ +#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */ +#define PERF_MEM_TLB_SHIFT 26 /* Access blocked */ -#define PERF_MEM_BLK_NA 0x01 /* not available */ -#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ -#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ -#define PERF_MEM_BLK_SHIFT 40 - -/* hop level */ -#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ -#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ -#define PERF_MEM_HOPS_3 0x04 /* remote board */ +#define PERF_MEM_BLK_NA 0x0001 /* Not available */ +#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + +/* Hop level */ +#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */ +#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */ +#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */ +#define PERF_MEM_HOPS_3 0x0004 /* Remote board */ /* 5-7 available */ -#define PERF_MEM_HOPS_SHIFT 43 +#define PERF_MEM_HOPS_SHIFT 43 #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) /* - * single taken branch record layout: + * Layout of single taken branch records: * * from: source instruction (may not always be a branch insn) * to: branch target @@ -1438,37 +1449,37 @@ union perf_mem_data_src { struct perf_branch_entry { __u64 from; __u64 to; - __u64 mispred:1, /* target mispredicted */ - predicted:1,/* target predicted */ - in_tx:1, /* in transaction */ - abort:1, /* transaction abort */ - cycles:16, /* cycle count to last branch */ - type:4, /* branch type */ - spec:2, /* branch speculation info */ - new_type:4, /* additional branch type */ - priv:3, /* privilege level */ - reserved:31; + __u64 mispred : 1, /* target mispredicted */ + predicted : 1, /* target predicted */ + in_tx : 1, /* in transaction */ + abort : 1, /* transaction abort */ + cycles : 16, /* cycle count to last branch */ + type : 4, /* branch type */ + spec : 2, /* branch speculation info */ + new_type : 4, /* additional branch type */ + priv : 3, /* privilege level */ + reserved : 31; }; /* Size of used info bits in struct perf_branch_entry */ #define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 union perf_sample_weight { - __u64 full; + __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) struct { - __u32 var1_dw; - __u16 var2_w; - __u16 var3_w; + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; }; #elif defined(__BIG_ENDIAN_BITFIELD) struct { - __u16 var3_w; - __u16 var2_w; - __u32 var1_dw; + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif }; diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 2970ef44655a..c27a4e238e4b 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -12,7 +12,7 @@ #define PIDFD_THREAD O_EXCL #ifdef __KERNEL__ #include <linux/sched.h> -#define PIDFD_CLONE CLONE_PIDFD +#define PIDFD_STALE CLONE_PIDFD #endif /* Flags for pidfd_send_signal(). */ @@ -25,10 +25,24 @@ #define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */ #define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */ #define PIDFD_INFO_EXIT (1UL << 3) /* Only returned if requested. */ +#define PIDFD_INFO_COREDUMP (1UL << 4) /* Only returned if requested. */ #define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */ /* + * Values for @coredump_mask in pidfd_info. + * Only valid if PIDFD_INFO_COREDUMP is set in @mask. + * + * Note, the @PIDFD_COREDUMP_ROOT flag indicates that the generated + * coredump should be treated as sensitive and access should only be + * granted to privileged users. + */ +#define PIDFD_COREDUMPED (1U << 0) /* Did crash and... */ +#define PIDFD_COREDUMP_SKIP (1U << 1) /* coredumping generation was skipped. */ +#define PIDFD_COREDUMP_USER (1U << 2) /* coredump was done as the user. */ +#define PIDFD_COREDUMP_ROOT (1U << 3) /* coredump was done as root. */ + +/* * The concept of process and threads in userland and the kernel is a confusing * one - within the kernel every thread is a 'task' with its own individual PID, * however from userland's point of view threads are grouped by a single PID, @@ -92,6 +106,8 @@ struct pidfd_info { __u32 fsuid; __u32 fsgid; __s32 exit_code; + __u32 coredump_mask; + __u32 __spare1; }; #define PIDFS_IOCTL_MAGIC 0xFF diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 15c18ef4eb11..43dec6eed559 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -364,4 +364,11 @@ struct prctl_mm_map { # define PR_TIMER_CREATE_RESTORE_IDS_ON 1 # define PR_TIMER_CREATE_RESTORE_IDS_GET 2 +/* FUTEX hash management */ +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) +# define PR_FUTEX_HASH_GET_SLOTS 2 +# define PR_FUTEX_HASH_GET_IMMUTABLE 3 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index f78ee3670dd5..1686861aae20 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -182,8 +182,12 @@ struct statx { /* File offset alignment for direct I/O reads */ __u32 stx_dio_read_offset_align; - /* 0xb8 */ - __u64 __spare3[9]; /* Spare space for future expansion */ + /* Optimised max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max_opt; + __u32 __spare2[1]; + + /* 0xc0 */ + __u64 __spare3[8]; /* Spare space for future expansion */ /* 0x100 */ }; diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index 95762232e018..5929030d4e8b 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 15 +#define TASKSTATS_VERSION 16 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -72,8 +72,6 @@ struct taskstats { */ __u64 cpu_count __attribute__((aligned(8))); __u64 cpu_delay_total; - __u64 cpu_delay_max; - __u64 cpu_delay_min; /* Following four fields atomically updated using task->delays->lock */ @@ -82,14 +80,10 @@ struct taskstats { */ __u64 blkio_count; __u64 blkio_delay_total; - __u64 blkio_delay_max; - __u64 blkio_delay_min; /* Delay waiting for page fault I/O (swap in only) */ __u64 swapin_count; __u64 swapin_delay_total; - __u64 swapin_delay_max; - __u64 swapin_delay_min; /* cpu "wall-clock" running time * On some architectures, value will adjust for cpu time stolen @@ -172,14 +166,11 @@ struct taskstats { /* Delay waiting for memory reclaim */ __u64 freepages_count; __u64 freepages_delay_total; - __u64 freepages_delay_max; - __u64 freepages_delay_min; + /* Delay waiting for thrashing page */ __u64 thrashing_count; __u64 thrashing_delay_total; - __u64 thrashing_delay_max; - __u64 thrashing_delay_min; /* v10: 64-bit btime to avoid overflow */ __u64 ac_btime64; /* 64-bit begin time */ @@ -187,8 +178,6 @@ struct taskstats { /* v11: Delay waiting for memory compact */ __u64 compact_count; __u64 compact_delay_total; - __u64 compact_delay_max; - __u64 compact_delay_min; /* v12 begin */ __u32 ac_tgid; /* thread group ID */ @@ -210,15 +199,37 @@ struct taskstats { /* v13: Delay waiting for write-protect copy */ __u64 wpcopy_count; __u64 wpcopy_delay_total; - __u64 wpcopy_delay_max; - __u64 wpcopy_delay_min; /* v14: Delay waiting for IRQ/SOFTIRQ */ __u64 irq_count; __u64 irq_delay_total; - __u64 irq_delay_max; - __u64 irq_delay_min; - /* v15: add Delay max */ + + /* v15: add Delay max and Delay min */ + + /* v16: move Delay max and Delay min to the end of taskstat */ + __u64 cpu_delay_max; + __u64 cpu_delay_min; + + __u64 blkio_delay_max; + __u64 blkio_delay_min; + + __u64 swapin_delay_max; + __u64 swapin_delay_min; + + __u64 freepages_delay_max; + __u64 freepages_delay_min; + + __u64 thrashing_delay_max; + __u64 thrashing_delay_min; + + __u64 compact_delay_max; + __u64 compact_delay_min; + + __u64 wpcopy_delay_max; + __u64 wpcopy_delay_min; + + __u64 irq_delay_max; + __u64 irq_delay_min; }; diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 583b86681c93..56c7e3fc666f 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -51,6 +51,10 @@ _IOR('u', 0x13, struct ublksrv_ctrl_cmd) #define UBLK_U_CMD_DEL_DEV_ASYNC \ _IOR('u', 0x14, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_UPDATE_SIZE \ + _IOWR('u', 0x15, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_QUIESCE_DEV \ + _IOWR('u', 0x16, struct ublksrv_ctrl_cmd) /* * 64bits are enough now, and it should be easy to extend in case of @@ -211,6 +215,63 @@ */ #define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9) +/* + * Resizing a block device is possible with UBLK_U_CMD_UPDATE_SIZE + * New size is passed in cmd->data[0] and is in units of sectors + */ +#define UBLK_F_UPDATE_SIZE (1ULL << 10) + +/* + * request buffer is registered automatically to uring_cmd's io_uring + * context before delivering this io command to ublk server, meantime + * it is un-registered automatically when completing this io command. + * + * For using this feature: + * + * - ublk server has to create sparse buffer table on the same `io_ring_ctx` + * for issuing `UBLK_IO_FETCH_REQ` and `UBLK_IO_COMMIT_AND_FETCH_REQ`. + * If uring_cmd isn't issued on same `io_ring_ctx`, it is ublk server's + * responsibility to unregister the buffer by issuing `IO_UNREGISTER_IO_BUF` + * manually, otherwise this ublk request won't complete. + * + * - ublk server passes auto buf register data via uring_cmd's sqe->addr, + * `struct ublk_auto_buf_reg` is populated from sqe->addr, please see + * the definition of ublk_sqe_addr_to_auto_buf_reg() + * + * - pass buffer index from `ublk_auto_buf_reg.index` + * + * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed + * + * - pass flags from `ublk_auto_buf_reg.flags` if needed + * + * This way avoids extra cost from two uring_cmd, but also simplifies backend + * implementation, such as, the dependency on IO_REGISTER_IO_BUF and + * IO_UNREGISTER_IO_BUF becomes not necessary. + * + * If wrong data or flags are provided, both IO_FETCH_REQ and + * IO_COMMIT_AND_FETCH_REQ are failed, for the latter, the ublk IO request + * won't be completed until new IO_COMMIT_AND_FETCH_REQ command is issued + * successfully + */ +#define UBLK_F_AUTO_BUF_REG (1ULL << 11) + +/* + * Control command `UBLK_U_CMD_QUIESCE_DEV` is added for quiescing device, + * which state can be transitioned to `UBLK_S_DEV_QUIESCED` or + * `UBLK_S_DEV_FAIL_IO` finally, and it needs ublk server cooperation for + * handling `UBLK_IO_RES_ABORT` correctly. + * + * Typical use case is for supporting to upgrade ublk server application, + * meantime keep ublk block device persistent during the period. + * + * This feature is only available when UBLK_F_USER_RECOVERY is enabled. + * + * Note, this command returns -EBUSY in case that all IO commands are being + * handled by ublk server and not completed in specified time period which + * is passed from the control command parameter. + */ +#define UBLK_F_QUIESCE (1ULL << 12) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 @@ -297,6 +358,17 @@ struct ublksrv_ctrl_dev_info { #define UBLK_IO_F_FUA (1U << 13) #define UBLK_IO_F_NOUNMAP (1U << 15) #define UBLK_IO_F_SWAP (1U << 16) +/* + * For UBLK_F_AUTO_BUF_REG & UBLK_AUTO_BUF_REG_FALLBACK only. + * + * This flag is set if auto buffer register is failed & ublk server passes + * UBLK_AUTO_BUF_REG_FALLBACK, and ublk server need to register buffer + * manually for handling the delivered IO command if this flag is observed + * + * ublk server has to check this flag if UBLK_AUTO_BUF_REG_FALLBACK is + * passed in. + */ +#define UBLK_IO_F_NEED_REG_BUF (1U << 17) /* * io cmd is described by this structure, and stored in share memory, indexed @@ -331,6 +403,62 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod) return iod->op_flags >> 8; } +/* + * If this flag is set, fallback by completing the uring_cmd and setting + * `UBLK_IO_F_NEED_REG_BUF` in case of auto-buf-register failure; + * otherwise the client ublk request is failed silently + * + * If ublk server passes this flag, it has to check if UBLK_IO_F_NEED_REG_BUF + * is set in `ublksrv_io_desc.op_flags`. If UBLK_IO_F_NEED_REG_BUF is set, + * ublk server needs to register io buffer manually for handling IO command. + */ +#define UBLK_AUTO_BUF_REG_FALLBACK (1 << 0) +#define UBLK_AUTO_BUF_REG_F_MASK UBLK_AUTO_BUF_REG_FALLBACK + +struct ublk_auto_buf_reg { + /* index for registering the delivered request buffer */ + __u16 index; + __u8 flags; + __u8 reserved0; + + /* + * io_ring FD can be passed via the reserve field in future for + * supporting to register io buffer to external io_uring + */ + __u32 reserved1; +}; + +/* + * For UBLK_F_AUTO_BUF_REG, auto buffer register data is carried via + * uring_cmd's sqe->addr: + * + * - bit0 ~ bit15: buffer index + * - bit16 ~ bit23: flags + * - bit24 ~ bit31: reserved0 + * - bit32 ~ bit63: reserved1 + */ +static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg( + __u64 sqe_addr) +{ + struct ublk_auto_buf_reg reg = { + .index = sqe_addr & 0xffff, + .flags = (sqe_addr >> 16) & 0xff, + .reserved0 = (sqe_addr >> 24) & 0xff, + .reserved1 = sqe_addr >> 32, + }; + + return reg; +} + +static inline __u64 +ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf) +{ + __u64 addr = buf->index | (__u64)buf->flags << 16 | (__u64)buf->reserved0 << 24 | + (__u64)buf->reserved1 << 32; + + return addr; +} + /* issued to ublk driver via /dev/ublkcN */ struct ublksrv_io_cmd { __u16 q_id; diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index b95dd84eef2d..d4b3e2ae1314 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -28,10 +28,10 @@ /* Set current process as the (exclusive) owner of this file descriptor. This * must be called before any other vhost command. Further calls to - * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */ + * VHOST_SET_OWNER fail until VHOST_RESET_OWNER is called. */ #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) /* Give up ownership, and reset the device to default values. - * Allows subsequent call to VHOST_OWNER_SET to succeed. */ + * Allows subsequent call to VHOST_SET_OWNER to succeed. */ #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) /* Set up/modify memory layout */ diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 8549d4571257..c691ac210ce2 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -246,6 +246,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_ADMIN_CMD_LIST_USE 0x1 /* Admin command group type. */ +#define VIRTIO_ADMIN_GROUP_TYPE_SELF 0x0 #define VIRTIO_ADMIN_GROUP_TYPE_SRIOV 0x1 /* Transitional device admin command. */ diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h index 8a24ed59ec46..1c47136d8715 100644 --- a/include/ufs/ufs.h +++ b/include/ufs/ufs.h @@ -180,7 +180,8 @@ enum attr_idn { QUERY_ATTR_IDN_AVAIL_WB_BUFF_SIZE = 0x1D, QUERY_ATTR_IDN_WB_BUFF_LIFE_TIME_EST = 0x1E, QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE = 0x1F, - QUERY_ATTR_IDN_TIMESTAMP = 0x30 + QUERY_ATTR_IDN_TIMESTAMP = 0x30, + QUERY_ATTR_IDN_DEV_LVL_EXCEPTION_ID = 0x34, }; /* Descriptor idn for Query requests */ @@ -390,6 +391,7 @@ enum { UFS_DEV_EXT_TEMP_NOTIF = BIT(6), UFS_DEV_HPB_SUPPORT = BIT(7), UFS_DEV_WRITE_BOOSTER_SUP = BIT(8), + UFS_DEV_LVL_EXCEPTION_SUP = BIT(12), }; #define UFS_DEV_HPB_SUPPORT_VERSION 0x310 @@ -419,6 +421,7 @@ enum { MASK_EE_TOO_LOW_TEMP = BIT(4), MASK_EE_WRITEBOOSTER_EVENT = BIT(5), MASK_EE_PERFORMANCE_THROTTLING = BIT(6), + MASK_EE_DEV_LVL_EXCEPTION = BIT(7), MASK_EE_HEALTH_CRITICAL = BIT(9), }; #define MASK_EE_URGENT_TEMP (MASK_EE_TOO_HIGH_TEMP | MASK_EE_TOO_LOW_TEMP) diff --git a/include/ufs/ufs_quirks.h b/include/ufs/ufs_quirks.h index 41ff44dfa1db..f52de5ed1b3b 100644 --- a/include/ufs/ufs_quirks.h +++ b/include/ufs/ufs_quirks.h @@ -107,4 +107,10 @@ struct ufs_dev_quirk { */ #define UFS_DEVICE_QUIRK_DELAY_AFTER_LPM (1 << 11) +/* + * Some ufs devices may need more time to be in hibern8 before exiting. + * Enable this quirk to give it an additional 100us. + */ +#define UFS_DEVICE_QUIRK_PA_HIBER8TIME (1 << 12) + #endif /* UFS_QUIRKS_H_ */ diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index e3909cc691b2..e928ed0265ff 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -246,7 +246,7 @@ struct ufs_query { struct ufs_dev_cmd { enum dev_cmd_type type; struct mutex lock; - struct completion *complete; + struct completion complete; struct ufs_query query; }; @@ -968,6 +968,9 @@ enum ufshcd_mcq_opr { * @pm_qos_req: PM QoS request handle * @pm_qos_enabled: flag to check if pm qos is enabled * @critical_health_count: count of critical health exceptions + * @dev_lvl_exception_count: count of device level exceptions since last reset + * @dev_lvl_exception_id: vendor specific information about the + * device level exception event. */ struct ufs_hba { void __iomem *mmio_base; @@ -1138,6 +1141,8 @@ struct ufs_hba { bool pm_qos_enabled; int critical_health_count; + atomic_t dev_lvl_exception_count; + u64 dev_lvl_exception_id; }; /** diff --git a/include/vdso/unaligned.h b/include/vdso/unaligned.h index eee3d2a4dbe4..ff0c06b6513e 100644 --- a/include/vdso/unaligned.h +++ b/include/vdso/unaligned.h @@ -2,14 +2,14 @@ #ifndef __VDSO_UNALIGNED_H #define __VDSO_UNALIGNED_H -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x; \ +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed * __get_pptr = (typeof(__get_pptr))(ptr); \ + __get_pptr->x; \ }) -#define __put_unaligned_t(type, val, ptr) do { \ - struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x = (val); \ +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed * __put_pptr = (typeof(__put_pptr))(ptr); \ + __put_pptr->x = (val); \ } while (0) #endif /* __VDSO_UNALIGNED_H */ |