diff options
89 files changed, 1688 insertions, 706 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f6befa9855c1..61ab1628a057 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3787,6 +3787,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. READ_CAPACITY_16 command); f = NO_REPORT_OPCODES (don't use report opcodes command, uas only); + g = MAX_SECTORS_240 (don't transfer more than + 240 sectors at a time, uas only); h = CAPACITY_HEURISTICS (decrease the reported device capacity by one sector if the number is odd); diff --git a/Documentation/module-signing.txt b/Documentation/module-signing.txt index 09c2382ad055..c72702ec1ded 100644 --- a/Documentation/module-signing.txt +++ b/Documentation/module-signing.txt @@ -119,9 +119,9 @@ Most notably, in the x509.genkey file, the req_distinguished_name section should be altered from the default: [ req_distinguished_name ] - O = Magrathea - CN = Glacier signing key - emailAddress = slartibartfast@magrathea.h2g2 + #O = Unspecified company + CN = Build time autogenerated kernel key + #emailAddress = unspecified.user@unspecified.company The generated RSA key size can also be set with: diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt index 639ddf0ece9b..9ed15f86c17c 100644 --- a/Documentation/networking/mpls-sysctl.txt +++ b/Documentation/networking/mpls-sysctl.txt @@ -18,3 +18,12 @@ platform_labels - INTEGER Possible values: 0 - 1048575 Default: 0 + +conf/<interface>/input - BOOL + Control whether packets can be input on this interface. + + If disabled, packets will be discarded without further + processing. + + 0 - disabled (default) + not 0 - enabled diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt index cbfac0949635..59f4db2a0c85 100644 --- a/Documentation/networking/scaling.txt +++ b/Documentation/networking/scaling.txt @@ -282,7 +282,7 @@ following is true: - The current CPU's queue head counter >= the recorded tail counter value in rps_dev_flow[i] -- The current CPU is unset (equal to RPS_NO_CPU) +- The current CPU is unset (>= nr_cpu_ids) - The current CPU is offline After this check, the packet is sent to the (possibly updated) current diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8e58c614c37d..b06dc3839268 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -115,7 +115,7 @@ config S390 select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE - select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z9_109_FEATURES + select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_DEBUG_KMEMLEAK diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index ba3b2aefddf5..d9c4c313fbc6 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -3,9 +3,10 @@ * * Support for s390 cryptographic instructions. * - * Copyright IBM Corp. 2003, 2007 + * Copyright IBM Corp. 2003, 2015 * Author(s): Thomas Spatzier * Jan Glauber (jan.glauber@de.ibm.com) + * Harald Freudenberger (freude@de.ibm.com) * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -28,15 +29,17 @@ #define CRYPT_S390_MSA 0x1 #define CRYPT_S390_MSA3 0x2 #define CRYPT_S390_MSA4 0x4 +#define CRYPT_S390_MSA5 0x8 /* s390 cryptographic operations */ enum crypt_s390_operations { - CRYPT_S390_KM = 0x0100, - CRYPT_S390_KMC = 0x0200, - CRYPT_S390_KIMD = 0x0300, - CRYPT_S390_KLMD = 0x0400, - CRYPT_S390_KMAC = 0x0500, - CRYPT_S390_KMCTR = 0x0600 + CRYPT_S390_KM = 0x0100, + CRYPT_S390_KMC = 0x0200, + CRYPT_S390_KIMD = 0x0300, + CRYPT_S390_KLMD = 0x0400, + CRYPT_S390_KMAC = 0x0500, + CRYPT_S390_KMCTR = 0x0600, + CRYPT_S390_PPNO = 0x0700 }; /* @@ -138,6 +141,16 @@ enum crypt_s390_kmac_func { KMAC_TDEA_192 = CRYPT_S390_KMAC | 3 }; +/* + * function codes for PPNO (PERFORM PSEUDORANDOM NUMBER + * OPERATION) instruction + */ +enum crypt_s390_ppno_func { + PPNO_QUERY = CRYPT_S390_PPNO | 0, + PPNO_SHA512_DRNG_GEN = CRYPT_S390_PPNO | 3, + PPNO_SHA512_DRNG_SEED = CRYPT_S390_PPNO | 0x83 +}; + /** * crypt_s390_km: * @func: the function code passed to KM; see crypt_s390_km_func @@ -162,11 +175,11 @@ static inline int crypt_s390_km(long func, void *param, int ret; asm volatile( - "0: .insn rre,0xb92e0000,%3,%1 \n" /* KM opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb92e0000,%3,%1\n" /* KM opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest) : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); if (ret < 0) @@ -198,11 +211,11 @@ static inline int crypt_s390_kmc(long func, void *param, int ret; asm volatile( - "0: .insn rre,0xb92f0000,%3,%1 \n" /* KMC opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb92f0000,%3,%1\n" /* KMC opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest) : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); if (ret < 0) @@ -233,11 +246,11 @@ static inline int crypt_s390_kimd(long func, void *param, int ret; asm volatile( - "0: .insn rre,0xb93e0000,%1,%1 \n" /* KIMD opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb93e0000,%1,%1\n" /* KIMD opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "=d" (ret), "+a" (__src), "+d" (__src_len) : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); if (ret < 0) @@ -267,11 +280,11 @@ static inline int crypt_s390_klmd(long func, void *param, int ret; asm volatile( - "0: .insn rre,0xb93f0000,%1,%1 \n" /* KLMD opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb93f0000,%1,%1\n" /* KLMD opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "=d" (ret), "+a" (__src), "+d" (__src_len) : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); if (ret < 0) @@ -302,11 +315,11 @@ static inline int crypt_s390_kmac(long func, void *param, int ret; asm volatile( - "0: .insn rre,0xb91e0000,%1,%1 \n" /* KLAC opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb91e0000,%1,%1\n" /* KLAC opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "=d" (ret), "+a" (__src), "+d" (__src_len) : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); if (ret < 0) @@ -340,11 +353,11 @@ static inline int crypt_s390_kmctr(long func, void *param, u8 *dest, int ret = -1; asm volatile( - "0: .insn rrf,0xb92d0000,%3,%1,%4,0 \n" /* KMCTR opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rrf,0xb92d0000,%3,%1,%4,0\n" /* KMCTR opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest), "+a" (__ctr) : "d" (__func), "a" (__param) : "cc", "memory"); @@ -354,6 +367,47 @@ static inline int crypt_s390_kmctr(long func, void *param, u8 *dest, } /** + * crypt_s390_ppno: + * @func: the function code passed to PPNO; see crypt_s390_ppno_func + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @dest_len: size of destination memory area in bytes + * @seed: address of seed data + * @seed_len: size of seed data in bytes + * + * Executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION) + * operation of the CPU. + * + * Returns -1 for failure, 0 for the query func, number of random + * bytes stored in dest buffer for generate function + */ +static inline int crypt_s390_ppno(long func, void *param, + u8 *dest, long dest_len, + const u8 *seed, long seed_len) +{ + register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; + register void *__param asm("1") = param; /* param block (240 bytes) */ + register u8 *__dest asm("2") = dest; /* buf for recv random bytes */ + register long __dest_len asm("3") = dest_len; /* requested random bytes */ + register const u8 *__seed asm("4") = seed; /* buf with seed data */ + register long __seed_len asm("5") = seed_len; /* bytes in seed buf */ + int ret = -1; + + asm volatile ( + "0: .insn rre,0xb93c0000,%1,%5\n" /* PPNO opcode */ + "1: brc 1,0b\n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "+d" (ret), "+a"(__dest), "+d"(__dest_len) + : "d"(__func), "a"(__param), "a"(__seed), "d"(__seed_len) + : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? dest_len - __dest_len : 0; +} + +/** * crypt_s390_func_available: * @func: the function code of the specific function; 0 if op in general * @@ -373,6 +427,9 @@ static inline int crypt_s390_func_available(int func, return 0; if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77)) return 0; + if (facility_mask & CRYPT_S390_MSA5 && !test_facility(57)) + return 0; + switch (func & CRYPT_S390_OP_MASK) { case CRYPT_S390_KM: ret = crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0); @@ -390,8 +447,12 @@ static inline int crypt_s390_func_available(int func, ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0); break; case CRYPT_S390_KMCTR: - ret = crypt_s390_kmctr(KMCTR_QUERY, &status, NULL, NULL, 0, - NULL); + ret = crypt_s390_kmctr(KMCTR_QUERY, &status, + NULL, NULL, 0, NULL); + break; + case CRYPT_S390_PPNO: + ret = crypt_s390_ppno(PPNO_QUERY, &status, + NULL, 0, NULL, 0); break; default: return 0; @@ -419,15 +480,14 @@ static inline int crypt_s390_pcc(long func, void *param) int ret = -1; asm volatile( - "0: .insn rre,0xb92c0000,0,0 \n" /* PCC opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb92c0000,0,0\n" /* PCC opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "+d" (ret) : "d" (__func), "a" (__param) : "cc", "memory"); return ret; } - #endif /* _CRYPTO_ARCH_S390_CRYPT_S390_H */ diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 94a35a4c1b48..1f374b39a4ec 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -1,106 +1,529 @@ /* - * Copyright IBM Corp. 2006, 2007 + * Copyright IBM Corp. 2006, 2015 * Author(s): Jan Glauber <jan.glauber@de.ibm.com> + * Harald Freudenberger <freude@de.ibm.com> * Driver for the s390 pseudo random number generator */ + +#define KMSG_COMPONENT "prng" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/fs.h> +#include <linux/fips.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/device.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/mutex.h> #include <linux/random.h> #include <linux/slab.h> #include <asm/debug.h> #include <asm/uaccess.h> +#include <asm/timex.h> #include "crypt_s390.h" MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jan Glauber <jan.glauber@de.ibm.com>"); +MODULE_AUTHOR("IBM Corporation"); MODULE_DESCRIPTION("s390 PRNG interface"); -static int prng_chunk_size = 256; -module_param(prng_chunk_size, int, S_IRUSR | S_IRGRP | S_IROTH); + +#define PRNG_MODE_AUTO 0 +#define PRNG_MODE_TDES 1 +#define PRNG_MODE_SHA512 2 + +static unsigned int prng_mode = PRNG_MODE_AUTO; +module_param_named(mode, prng_mode, int, 0); +MODULE_PARM_DESC(prng_mode, "PRNG mode: 0 - auto, 1 - TDES, 2 - SHA512"); + + +#define PRNG_CHUNKSIZE_TDES_MIN 8 +#define PRNG_CHUNKSIZE_TDES_MAX (64*1024) +#define PRNG_CHUNKSIZE_SHA512_MIN 64 +#define PRNG_CHUNKSIZE_SHA512_MAX (64*1024) + +static unsigned int prng_chunk_size = 256; +module_param_named(chunksize, prng_chunk_size, int, 0); MODULE_PARM_DESC(prng_chunk_size, "PRNG read chunk size in bytes"); -static int prng_entropy_limit = 4096; -module_param(prng_entropy_limit, int, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); -MODULE_PARM_DESC(prng_entropy_limit, - "PRNG add entropy after that much bytes were produced"); + +#define PRNG_RESEED_LIMIT_TDES 4096 +#define PRNG_RESEED_LIMIT_TDES_LOWER 4096 +#define PRNG_RESEED_LIMIT_SHA512 100000 +#define PRNG_RESEED_LIMIT_SHA512_LOWER 10000 + +static unsigned int prng_reseed_limit; +module_param_named(reseed_limit, prng_reseed_limit, int, 0); +MODULE_PARM_DESC(prng_reseed_limit, "PRNG reseed limit"); + /* * Any one who considers arithmetical methods of producing random digits is, * of course, in a state of sin. -- John von Neumann */ -struct s390_prng_data { - unsigned long count; /* how many bytes were produced */ - char *buf; +static int prng_errorflag; + +#define PRNG_GEN_ENTROPY_FAILED 1 +#define PRNG_SELFTEST_FAILED 2 +#define PRNG_INSTANTIATE_FAILED 3 +#define PRNG_SEED_FAILED 4 +#define PRNG_RESEED_FAILED 5 +#define PRNG_GEN_FAILED 6 + +struct prng_ws_s { + u8 parm_block[32]; + u32 reseed_counter; + u64 byte_counter; }; -static struct s390_prng_data *p; +struct ppno_ws_s { + u32 res; + u32 reseed_counter; + u64 stream_bytes; + u8 V[112]; + u8 C[112]; +}; -/* copied from libica, use a non-zero initial parameter block */ -static unsigned char parm_block[32] = { -0x0F,0x2B,0x8E,0x63,0x8C,0x8E,0xD2,0x52,0x64,0xB7,0xA0,0x7B,0x75,0x28,0xB8,0xF4, -0x75,0x5F,0xD2,0xA6,0x8D,0x97,0x11,0xFF,0x49,0xD8,0x23,0xF3,0x7E,0x21,0xEC,0xA0, +struct prng_data_s { + struct mutex mutex; + union { + struct prng_ws_s prngws; + struct ppno_ws_s ppnows; + }; + u8 *buf; + u32 rest; + u8 *prev; }; -static int prng_open(struct inode *inode, struct file *file) +static struct prng_data_s *prng_data; + +/* initial parameter block for tdes mode, copied from libica */ +static const u8 initial_parm_block[32] __initconst = { + 0x0F, 0x2B, 0x8E, 0x63, 0x8C, 0x8E, 0xD2, 0x52, + 0x64, 0xB7, 0xA0, 0x7B, 0x75, 0x28, 0xB8, 0xF4, + 0x75, 0x5F, 0xD2, 0xA6, 0x8D, 0x97, 0x11, 0xFF, + 0x49, 0xD8, 0x23, 0xF3, 0x7E, 0x21, 0xEC, 0xA0 }; + + +/*** helper functions ***/ + +static int generate_entropy(u8 *ebuf, size_t nbytes) { - return nonseekable_open(inode, file); + int n, ret = 0; + u8 *pg, *h, hash[32]; + + pg = (u8 *) __get_free_page(GFP_KERNEL); + if (!pg) { + prng_errorflag = PRNG_GEN_ENTROPY_FAILED; + return -ENOMEM; + } + + while (nbytes) { + /* fill page with urandom bytes */ + get_random_bytes(pg, PAGE_SIZE); + /* exor page with stckf values */ + for (n = 0; n < sizeof(PAGE_SIZE/sizeof(u64)); n++) { + u64 *p = ((u64 *)pg) + n; + *p ^= get_tod_clock_fast(); + } + n = (nbytes < sizeof(hash)) ? nbytes : sizeof(hash); + if (n < sizeof(hash)) + h = hash; + else + h = ebuf; + /* generate sha256 from this page */ + if (crypt_s390_kimd(KIMD_SHA_256, h, + pg, PAGE_SIZE) != PAGE_SIZE) { + prng_errorflag = PRNG_GEN_ENTROPY_FAILED; + ret = -EIO; + goto out; + } + if (n < sizeof(hash)) + memcpy(ebuf, hash, n); + ret += n; + ebuf += n; + nbytes -= n; + } + +out: + free_page((unsigned long)pg); + return ret; } -static void prng_add_entropy(void) + +/*** tdes functions ***/ + +static void prng_tdes_add_entropy(void) { __u64 entropy[4]; unsigned int i; int ret; for (i = 0; i < 16; i++) { - ret = crypt_s390_kmc(KMC_PRNG, parm_block, (char *)entropy, - (char *)entropy, sizeof(entropy)); + ret = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block, + (char *)entropy, (char *)entropy, + sizeof(entropy)); BUG_ON(ret < 0 || ret != sizeof(entropy)); - memcpy(parm_block, entropy, sizeof(entropy)); + memcpy(prng_data->prngws.parm_block, entropy, sizeof(entropy)); } } -static void prng_seed(int nbytes) + +static void prng_tdes_seed(int nbytes) { char buf[16]; int i = 0; - BUG_ON(nbytes > 16); + BUG_ON(nbytes > sizeof(buf)); + get_random_bytes(buf, nbytes); /* Add the entropy */ while (nbytes >= 8) { - *((__u64 *)parm_block) ^= *((__u64 *)(buf+i)); - prng_add_entropy(); + *((__u64 *)prng_data->prngws.parm_block) ^= *((__u64 *)(buf+i)); + prng_tdes_add_entropy(); i += 8; nbytes -= 8; } - prng_add_entropy(); + prng_tdes_add_entropy(); + prng_data->prngws.reseed_counter = 0; +} + + +static int __init prng_tdes_instantiate(void) +{ + int datalen; + + pr_debug("prng runs in TDES mode with " + "chunksize=%d and reseed_limit=%u\n", + prng_chunk_size, prng_reseed_limit); + + /* memory allocation, prng_data struct init, mutex init */ + datalen = sizeof(struct prng_data_s) + prng_chunk_size; + prng_data = kzalloc(datalen, GFP_KERNEL); + if (!prng_data) { + prng_errorflag = PRNG_INSTANTIATE_FAILED; + return -ENOMEM; + } + mutex_init(&prng_data->mutex); + prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s); + memcpy(prng_data->prngws.parm_block, initial_parm_block, 32); + + /* initialize the PRNG, add 128 bits of entropy */ + prng_tdes_seed(16); + + return 0; } -static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes, - loff_t *ppos) + +static void prng_tdes_deinstantiate(void) +{ + pr_debug("The prng module stopped " + "after running in triple DES mode\n"); + kzfree(prng_data); +} + + +/*** sha512 functions ***/ + +static int __init prng_sha512_selftest(void) { - int chunk, n; + /* NIST DRBG testvector for Hash Drbg, Sha-512, Count #0 */ + static const u8 seed[] __initconst = { + 0x6b, 0x50, 0xa7, 0xd8, 0xf8, 0xa5, 0x5d, 0x7a, + 0x3d, 0xf8, 0xbb, 0x40, 0xbc, 0xc3, 0xb7, 0x22, + 0xd8, 0x70, 0x8d, 0xe6, 0x7f, 0xda, 0x01, 0x0b, + 0x03, 0xc4, 0xc8, 0x4d, 0x72, 0x09, 0x6f, 0x8c, + 0x3e, 0xc6, 0x49, 0xcc, 0x62, 0x56, 0xd9, 0xfa, + 0x31, 0xdb, 0x7a, 0x29, 0x04, 0xaa, 0xf0, 0x25 }; + static const u8 V0[] __initconst = { + 0x00, 0xad, 0xe3, 0x6f, 0x9a, 0x01, 0xc7, 0x76, + 0x61, 0x34, 0x35, 0xf5, 0x4e, 0x24, 0x74, 0x22, + 0x21, 0x9a, 0x29, 0x89, 0xc7, 0x93, 0x2e, 0x60, + 0x1e, 0xe8, 0x14, 0x24, 0x8d, 0xd5, 0x03, 0xf1, + 0x65, 0x5d, 0x08, 0x22, 0x72, 0xd5, 0xad, 0x95, + 0xe1, 0x23, 0x1e, 0x8a, 0xa7, 0x13, 0xd9, 0x2b, + 0x5e, 0xbc, 0xbb, 0x80, 0xab, 0x8d, 0xe5, 0x79, + 0xab, 0x5b, 0x47, 0x4e, 0xdd, 0xee, 0x6b, 0x03, + 0x8f, 0x0f, 0x5c, 0x5e, 0xa9, 0x1a, 0x83, 0xdd, + 0xd3, 0x88, 0xb2, 0x75, 0x4b, 0xce, 0x83, 0x36, + 0x57, 0x4b, 0xf1, 0x5c, 0xca, 0x7e, 0x09, 0xc0, + 0xd3, 0x89, 0xc6, 0xe0, 0xda, 0xc4, 0x81, 0x7e, + 0x5b, 0xf9, 0xe1, 0x01, 0xc1, 0x92, 0x05, 0xea, + 0xf5, 0x2f, 0xc6, 0xc6, 0xc7, 0x8f, 0xbc, 0xf4 }; + static const u8 C0[] __initconst = { + 0x00, 0xf4, 0xa3, 0xe5, 0xa0, 0x72, 0x63, 0x95, + 0xc6, 0x4f, 0x48, 0xd0, 0x8b, 0x5b, 0x5f, 0x8e, + 0x6b, 0x96, 0x1f, 0x16, 0xed, 0xbc, 0x66, 0x94, + 0x45, 0x31, 0xd7, 0x47, 0x73, 0x22, 0xa5, 0x86, + 0xce, 0xc0, 0x4c, 0xac, 0x63, 0xb8, 0x39, 0x50, + 0xbf, 0xe6, 0x59, 0x6c, 0x38, 0x58, 0x99, 0x1f, + 0x27, 0xa7, 0x9d, 0x71, 0x2a, 0xb3, 0x7b, 0xf9, + 0xfb, 0x17, 0x86, 0xaa, 0x99, 0x81, 0xaa, 0x43, + 0xe4, 0x37, 0xd3, 0x1e, 0x6e, 0xe5, 0xe6, 0xee, + 0xc2, 0xed, 0x95, 0x4f, 0x53, 0x0e, 0x46, 0x8a, + 0xcc, 0x45, 0xa5, 0xdb, 0x69, 0x0d, 0x81, 0xc9, + 0x32, 0x92, 0xbc, 0x8f, 0x33, 0xe6, 0xf6, 0x09, + 0x7c, 0x8e, 0x05, 0x19, 0x0d, 0xf1, 0xb6, 0xcc, + 0xf3, 0x02, 0x21, 0x90, 0x25, 0xec, 0xed, 0x0e }; + static const u8 random[] __initconst = { + 0x95, 0xb7, 0xf1, 0x7e, 0x98, 0x02, 0xd3, 0x57, + 0x73, 0x92, 0xc6, 0xa9, 0xc0, 0x80, 0x83, 0xb6, + 0x7d, 0xd1, 0x29, 0x22, 0x65, 0xb5, 0xf4, 0x2d, + 0x23, 0x7f, 0x1c, 0x55, 0xbb, 0x9b, 0x10, 0xbf, + 0xcf, 0xd8, 0x2c, 0x77, 0xa3, 0x78, 0xb8, 0x26, + 0x6a, 0x00, 0x99, 0x14, 0x3b, 0x3c, 0x2d, 0x64, + 0x61, 0x1e, 0xee, 0xb6, 0x9a, 0xcd, 0xc0, 0x55, + 0x95, 0x7c, 0x13, 0x9e, 0x8b, 0x19, 0x0c, 0x7a, + 0x06, 0x95, 0x5f, 0x2c, 0x79, 0x7c, 0x27, 0x78, + 0xde, 0x94, 0x03, 0x96, 0xa5, 0x01, 0xf4, 0x0e, + 0x91, 0x39, 0x6a, 0xcf, 0x8d, 0x7e, 0x45, 0xeb, + 0xdb, 0xb5, 0x3b, 0xbf, 0x8c, 0x97, 0x52, 0x30, + 0xd2, 0xf0, 0xff, 0x91, 0x06, 0xc7, 0x61, 0x19, + 0xae, 0x49, 0x8e, 0x7f, 0xbc, 0x03, 0xd9, 0x0f, + 0x8e, 0x4c, 0x51, 0x62, 0x7a, 0xed, 0x5c, 0x8d, + 0x42, 0x63, 0xd5, 0xd2, 0xb9, 0x78, 0x87, 0x3a, + 0x0d, 0xe5, 0x96, 0xee, 0x6d, 0xc7, 0xf7, 0xc2, + 0x9e, 0x37, 0xee, 0xe8, 0xb3, 0x4c, 0x90, 0xdd, + 0x1c, 0xf6, 0xa9, 0xdd, 0xb2, 0x2b, 0x4c, 0xbd, + 0x08, 0x6b, 0x14, 0xb3, 0x5d, 0xe9, 0x3d, 0xa2, + 0xd5, 0xcb, 0x18, 0x06, 0x69, 0x8c, 0xbd, 0x7b, + 0xbb, 0x67, 0xbf, 0xe3, 0xd3, 0x1f, 0xd2, 0xd1, + 0xdb, 0xd2, 0xa1, 0xe0, 0x58, 0xa3, 0xeb, 0x99, + 0xd7, 0xe5, 0x1f, 0x1a, 0x93, 0x8e, 0xed, 0x5e, + 0x1c, 0x1d, 0xe2, 0x3a, 0x6b, 0x43, 0x45, 0xd3, + 0x19, 0x14, 0x09, 0xf9, 0x2f, 0x39, 0xb3, 0x67, + 0x0d, 0x8d, 0xbf, 0xb6, 0x35, 0xd8, 0xe6, 0xa3, + 0x69, 0x32, 0xd8, 0x10, 0x33, 0xd1, 0x44, 0x8d, + 0x63, 0xb4, 0x03, 0xdd, 0xf8, 0x8e, 0x12, 0x1b, + 0x6e, 0x81, 0x9a, 0xc3, 0x81, 0x22, 0x6c, 0x13, + 0x21, 0xe4, 0xb0, 0x86, 0x44, 0xf6, 0x72, 0x7c, + 0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 }; + int ret = 0; - int tmp; + u8 buf[sizeof(random)]; + struct ppno_ws_s ws; + + memset(&ws, 0, sizeof(ws)); + + /* initial seed */ + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED, + &ws, NULL, 0, + seed, sizeof(seed)); + if (ret < 0) { + pr_err("The prng self test seed operation for the " + "SHA-512 mode failed with rc=%d\n", ret); + prng_errorflag = PRNG_SELFTEST_FAILED; + return -EIO; + } + + /* check working states V and C */ + if (memcmp(ws.V, V0, sizeof(V0)) != 0 + || memcmp(ws.C, C0, sizeof(C0)) != 0) { + pr_err("The prng self test state test " + "for the SHA-512 mode failed\n"); + prng_errorflag = PRNG_SELFTEST_FAILED; + return -EIO; + } + + /* generate random bytes */ + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, + &ws, buf, sizeof(buf), + NULL, 0); + if (ret < 0) { + pr_err("The prng self test generate operation for " + "the SHA-512 mode failed with rc=%d\n", ret); + prng_errorflag = PRNG_SELFTEST_FAILED; + return -EIO; + } + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, + &ws, buf, sizeof(buf), + NULL, 0); + if (ret < 0) { + pr_err("The prng self test generate operation for " + "the SHA-512 mode failed with rc=%d\n", ret); + prng_errorflag = PRNG_SELFTEST_FAILED; + return -EIO; + } + + /* check against expected data */ + if (memcmp(buf, random, sizeof(random)) != 0) { + pr_err("The prng self test data test " + "for the SHA-512 mode failed\n"); + prng_errorflag = PRNG_SELFTEST_FAILED; + return -EIO; + } + + return 0; +} + + +static int __init prng_sha512_instantiate(void) +{ + int ret, datalen; + u8 seed[64]; + + pr_debug("prng runs in SHA-512 mode " + "with chunksize=%d and reseed_limit=%u\n", + prng_chunk_size, prng_reseed_limit); + + /* memory allocation, prng_data struct init, mutex init */ + datalen = sizeof(struct prng_data_s) + prng_chunk_size; + if (fips_enabled) + datalen += prng_chunk_size; + prng_data = kzalloc(datalen, GFP_KERNEL); + if (!prng_data) { + prng_errorflag = PRNG_INSTANTIATE_FAILED; + return -ENOMEM; + } + mutex_init(&prng_data->mutex); + prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s); + + /* selftest */ + ret = prng_sha512_selftest(); + if (ret) + goto outfree; + + /* generate initial seed bytestring, first 48 bytes of entropy */ + ret = generate_entropy(seed, 48); + if (ret != 48) + goto outfree; + /* followed by 16 bytes of unique nonce */ + get_tod_clock_ext(seed + 48); + + /* initial seed of the ppno drng */ + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED, + &prng_data->ppnows, NULL, 0, + seed, sizeof(seed)); + if (ret < 0) { + prng_errorflag = PRNG_SEED_FAILED; + ret = -EIO; + goto outfree; + } + + /* if fips mode is enabled, generate a first block of random + bytes for the FIPS 140-2 Conditional Self Test */ + if (fips_enabled) { + prng_data->prev = prng_data->buf + prng_chunk_size; + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, + &prng_data->ppnows, + prng_data->prev, + prng_chunk_size, + NULL, 0); + if (ret < 0 || ret != prng_chunk_size) { + prng_errorflag = PRNG_GEN_FAILED; + ret = -EIO; + goto outfree; + } + } + + return 0; + +outfree: + kfree(prng_data); + return ret; +} + + +static void prng_sha512_deinstantiate(void) +{ + pr_debug("The prng module stopped after running in SHA-512 mode\n"); + kzfree(prng_data); +} + + +static int prng_sha512_reseed(void) +{ + int ret; + u8 seed[32]; + + /* generate 32 bytes of fresh entropy */ + ret = generate_entropy(seed, sizeof(seed)); + if (ret != sizeof(seed)) + return ret; + + /* do a reseed of the ppno drng with this bytestring */ + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED, + &prng_data->ppnows, NULL, 0, + seed, sizeof(seed)); + if (ret) { + prng_errorflag = PRNG_RESEED_FAILED; + return -EIO; + } + + return 0; +} + + +static int prng_sha512_generate(u8 *buf, size_t nbytes) +{ + int ret; + + /* reseed needed ? */ + if (prng_data->ppnows.reseed_counter > prng_reseed_limit) { + ret = prng_sha512_reseed(); + if (ret) + return ret; + } + + /* PPNO generate */ + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, + &prng_data->ppnows, buf, nbytes, + NULL, 0); + if (ret < 0 || ret != nbytes) { + prng_errorflag = PRNG_GEN_FAILED; + return -EIO; + } + + /* FIPS 140-2 Conditional Self Test */ + if (fips_enabled) { + if (!memcmp(prng_data->prev, buf, nbytes)) { + prng_errorflag = PRNG_GEN_FAILED; + return -EILSEQ; + } + memcpy(prng_data->prev, buf, nbytes); + } + + return ret; +} + + +/*** file io functions ***/ + +static int prng_open(struct inode *inode, struct file *file) +{ + return nonseekable_open(inode, file); +} + + +static ssize_t prng_tdes_read(struct file *file, char __user *ubuf, + size_t nbytes, loff_t *ppos) +{ + int chunk, n, tmp, ret = 0; + + /* lock prng_data struct */ + if (mutex_lock_interruptible(&prng_data->mutex)) + return -ERESTARTSYS; - /* nbytes can be arbitrary length, we split it into chunks */ while (nbytes) { - /* same as in extract_entropy_user in random.c */ if (need_resched()) { if (signal_pending(current)) { if (ret == 0) ret = -ERESTARTSYS; break; } + /* give mutex free before calling schedule() */ + mutex_unlock(&prng_data->mutex); schedule(); + /* occopy mutex again */ + if (mutex_lock_interruptible(&prng_data->mutex)) { + if (ret == 0) + ret = -ERESTARTSYS; + return ret; + } } /* @@ -112,12 +535,11 @@ static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes, /* PRNG only likes multiples of 8 bytes */ n = (chunk + 7) & -8; - if (p->count > prng_entropy_limit) - prng_seed(8); + if (prng_data->prngws.reseed_counter > prng_reseed_limit) + prng_tdes_seed(8); /* if the CPU supports PRNG stckf is present too */ - asm volatile(".insn s,0xb27c0000,%0" - : "=m" (*((unsigned long long *)p->buf)) : : "cc"); + *((unsigned long long *)prng_data->buf) = get_tod_clock_fast(); /* * Beside the STCKF the input for the TDES-EDE is the output @@ -132,35 +554,259 @@ static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes, * Note: you can still get strict X9.17 conformity by setting * prng_chunk_size to 8 bytes. */ - tmp = crypt_s390_kmc(KMC_PRNG, parm_block, p->buf, p->buf, n); - BUG_ON((tmp < 0) || (tmp != n)); + tmp = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block, + prng_data->buf, prng_data->buf, n); + if (tmp < 0 || tmp != n) { + ret = -EIO; + break; + } - p->count += n; + prng_data->prngws.byte_counter += n; + prng_data->prngws.reseed_counter += n; - if (copy_to_user(ubuf, p->buf, chunk)) + if (copy_to_user(ubuf, prng_data->buf, chunk)) return -EFAULT; nbytes -= chunk; ret += chunk; ubuf += chunk; } + + /* unlock prng_data struct */ + mutex_unlock(&prng_data->mutex); + return ret; } -static const struct file_operations prng_fops = { + +static ssize_t prng_sha512_read(struct file *file, char __user *ubuf, + size_t nbytes, loff_t *ppos) +{ + int n, ret = 0; + u8 *p; + + /* if errorflag is set do nothing and return 'broken pipe' */ + if (prng_errorflag) + return -EPIPE; + + /* lock prng_data struct */ + if (mutex_lock_interruptible(&prng_data->mutex)) + return -ERESTARTSYS; + + while (nbytes) { + if (need_resched()) { + if (signal_pending(current)) { + if (ret == 0) + ret = -ERESTARTSYS; + break; + } + /* give mutex free before calling schedule() */ + mutex_unlock(&prng_data->mutex); + schedule(); + /* occopy mutex again */ + if (mutex_lock_interruptible(&prng_data->mutex)) { + if (ret == 0) + ret = -ERESTARTSYS; + return ret; + } + } + if (prng_data->rest) { + /* push left over random bytes from the previous read */ + p = prng_data->buf + prng_chunk_size - prng_data->rest; + n = (nbytes < prng_data->rest) ? + nbytes : prng_data->rest; + prng_data->rest -= n; + } else { + /* generate one chunk of random bytes into read buf */ + p = prng_data->buf; + n = prng_sha512_generate(p, prng_chunk_size); + if (n < 0) { + ret = n; + break; + } + if (nbytes < prng_chunk_size) { + n = nbytes; + prng_data->rest = prng_chunk_size - n; + } else { + n = prng_chunk_size; + prng_data->rest = 0; + } + } + if (copy_to_user(ubuf, p, n)) { + ret = -EFAULT; + break; + } + ubuf += n; + nbytes -= n; + ret += n; + } + + /* unlock prng_data struct */ + mutex_unlock(&prng_data->mutex); + + return ret; +} + + +/*** sysfs stuff ***/ + +static const struct file_operations prng_sha512_fops = { + .owner = THIS_MODULE, + .open = &prng_open, + .release = NULL, + .read = &prng_sha512_read, + .llseek = noop_llseek, +}; +static const struct file_operations prng_tdes_fops = { .owner = THIS_MODULE, .open = &prng_open, .release = NULL, - .read = &prng_read, + .read = &prng_tdes_read, .llseek = noop_llseek, }; -static struct miscdevice prng_dev = { +static struct miscdevice prng_sha512_dev = { + .name = "prandom", + .minor = MISC_DYNAMIC_MINOR, + .fops = &prng_sha512_fops, +}; +static struct miscdevice prng_tdes_dev = { .name = "prandom", .minor = MISC_DYNAMIC_MINOR, - .fops = &prng_fops, + .fops = &prng_tdes_fops, }; + +/* chunksize attribute (ro) */ +static ssize_t prng_chunksize_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size); +} +static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL); + +/* counter attribute (ro) */ +static ssize_t prng_counter_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u64 counter; + + if (mutex_lock_interruptible(&prng_data->mutex)) + return -ERESTARTSYS; + if (prng_mode == PRNG_MODE_SHA512) + counter = prng_data->ppnows.stream_bytes; + else + counter = prng_data->prngws.byte_counter; + mutex_unlock(&prng_data->mutex); + + return snprintf(buf, PAGE_SIZE, "%llu\n", counter); +} +static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL); + +/* errorflag attribute (ro) */ +static ssize_t prng_errorflag_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag); +} +static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL); + +/* mode attribute (ro) */ +static ssize_t prng_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (prng_mode == PRNG_MODE_TDES) + return snprintf(buf, PAGE_SIZE, "TDES\n"); + else + return snprintf(buf, PAGE_SIZE, "SHA512\n"); +} +static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL); + +/* reseed attribute (w) */ +static ssize_t prng_reseed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + if (mutex_lock_interruptible(&prng_data->mutex)) + return -ERESTARTSYS; + prng_sha512_reseed(); + mutex_unlock(&prng_data->mutex); + + return count; +} +static DEVICE_ATTR(reseed, 0200, NULL, prng_reseed_store); + +/* reseed limit attribute (rw) */ +static ssize_t prng_reseed_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit); +} +static ssize_t prng_reseed_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned limit; + + if (sscanf(buf, "%u\n", &limit) != 1) + return -EINVAL; + + if (prng_mode == PRNG_MODE_SHA512) { + if (limit < PRNG_RESEED_LIMIT_SHA512_LOWER) + return -EINVAL; + } else { + if (limit < PRNG_RESEED_LIMIT_TDES_LOWER) + return -EINVAL; + } + + prng_reseed_limit = limit; + + return count; +} +static DEVICE_ATTR(reseed_limit, 0644, + prng_reseed_limit_show, prng_reseed_limit_store); + +/* strength attribute (ro) */ +static ssize_t prng_strength_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "256\n"); +} +static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL); + +static struct attribute *prng_sha512_dev_attrs[] = { + &dev_attr_errorflag.attr, + &dev_attr_chunksize.attr, + &dev_attr_byte_counter.attr, + &dev_attr_mode.attr, + &dev_attr_reseed.attr, + &dev_attr_reseed_limit.attr, + &dev_attr_strength.attr, + NULL +}; +static struct attribute *prng_tdes_dev_attrs[] = { + &dev_attr_chunksize.attr, + &dev_attr_byte_counter.attr, + &dev_attr_mode.attr, + NULL +}; + +static struct attribute_group prng_sha512_dev_attr_group = { + .attrs = prng_sha512_dev_attrs +}; +static struct attribute_group prng_tdes_dev_attr_group = { + .attrs = prng_tdes_dev_attrs +}; + + +/*** module init and exit ***/ + static int __init prng_init(void) { int ret; @@ -169,43 +815,105 @@ static int __init prng_init(void) if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA)) return -EOPNOTSUPP; - if (prng_chunk_size < 8) - return -EINVAL; + /* choose prng mode */ + if (prng_mode != PRNG_MODE_TDES) { + /* check for MSA5 support for PPNO operations */ + if (!crypt_s390_func_available(PPNO_SHA512_DRNG_GEN, + CRYPT_S390_MSA5)) { + if (prng_mode == PRNG_MODE_SHA512) { + pr_err("The prng module cannot " + "start in SHA-512 mode\n"); + return -EOPNOTSUPP; + } + prng_mode = PRNG_MODE_TDES; + } else + prng_mode = PRNG_MODE_SHA512; + } - p = kmalloc(sizeof(struct s390_prng_data), GFP_KERNEL); - if (!p) - return -ENOMEM; - p->count = 0; + if (prng_mode == PRNG_MODE_SHA512) { - p->buf = kmalloc(prng_chunk_size, GFP_KERNEL); - if (!p->buf) { - ret = -ENOMEM; - goto out_free; - } + /* SHA512 mode */ - /* initialize the PRNG, add 128 bits of entropy */ - prng_seed(16); + if (prng_chunk_size < PRNG_CHUNKSIZE_SHA512_MIN + || prng_chunk_size > PRNG_CHUNKSIZE_SHA512_MAX) + return -EINVAL; + prng_chunk_size = (prng_chunk_size + 0x3f) & ~0x3f; - ret = misc_register(&prng_dev); - if (ret) - goto out_buf; - return 0; + if (prng_reseed_limit == 0) + prng_reseed_limit = PRNG_RESEED_LIMIT_SHA512; + else if (prng_reseed_limit < PRNG_RESEED_LIMIT_SHA512_LOWER) + return -EINVAL; + + ret = prng_sha512_instantiate(); + if (ret) + goto out; + + ret = misc_register(&prng_sha512_dev); + if (ret) { + prng_sha512_deinstantiate(); + goto out; + } + ret = sysfs_create_group(&prng_sha512_dev.this_device->kobj, + &prng_sha512_dev_attr_group); + if (ret) { + misc_deregister(&prng_sha512_dev); + prng_sha512_deinstantiate(); + goto out; + } -out_buf: - kfree(p->buf); -out_free: - kfree(p); + } else { + + /* TDES mode */ + + if (prng_chunk_size < PRNG_CHUNKSIZE_TDES_MIN + || prng_chunk_size > PRNG_CHUNKSIZE_TDES_MAX) + return -EINVAL; + prng_chunk_size = (prng_chunk_size + 0x07) & ~0x07; + + if (prng_reseed_limit == 0) + prng_reseed_limit = PRNG_RESEED_LIMIT_TDES; + else if (prng_reseed_limit < PRNG_RESEED_LIMIT_TDES_LOWER) + return -EINVAL; + + ret = prng_tdes_instantiate(); + if (ret) + goto out; + + ret = misc_register(&prng_tdes_dev); + if (ret) { + prng_tdes_deinstantiate(); + goto out; + } + ret = sysfs_create_group(&prng_tdes_dev.this_device->kobj, + &prng_tdes_dev_attr_group); + if (ret) { + misc_deregister(&prng_tdes_dev); + prng_tdes_deinstantiate(); + goto out; + } + + } + +out: return ret; } + static void __exit prng_exit(void) { - /* wipe me */ - kzfree(p->buf); - kfree(p); - - misc_deregister(&prng_dev); + if (prng_mode == PRNG_MODE_SHA512) { + sysfs_remove_group(&prng_sha512_dev.this_device->kobj, + &prng_sha512_dev_attr_group); + misc_deregister(&prng_sha512_dev); + prng_sha512_deinstantiate(); + } else { + sysfs_remove_group(&prng_tdes_dev.this_device->kobj, + &prng_tdes_dev_attr_group); + misc_deregister(&prng_tdes_dev); + prng_tdes_deinstantiate(); + } } + module_init(prng_init); module_exit(prng_exit); diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index 694bcd6bd927..2f924bc30e35 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -26,6 +26,9 @@ /* Not more than 2GB */ #define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31) +/* Allocate control page with GFP_DMA */ +#define KEXEC_CONTROL_MEMORY_GFP GFP_DMA + /* Maximum address we can use for the crash control pages */ #define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index a5e656260a70..d29ad9545b41 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -14,7 +14,9 @@ typedef struct { unsigned long asce_bits; unsigned long asce_limit; unsigned long vdso_base; - /* The mmu context has extended page tables. */ + /* The mmu context allocates 4K page tables. */ + unsigned int alloc_pgste:1; + /* The mmu context uses extended page tables. */ unsigned int has_pgste:1; /* The mmu context uses storage keys. */ unsigned int use_skey:1; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index d25d9ff10ba8..fb1b93ea3e3f 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -20,8 +20,11 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.flush_mm = 0; mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; mm->context.asce_bits |= _ASCE_TYPE_REGION3; +#ifdef CONFIG_PGSTE + mm->context.alloc_pgste = page_table_allocate_pgste; mm->context.has_pgste = 0; mm->context.use_skey = 0; +#endif mm->context.asce_limit = STACK_TOP_MAX; crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 51e7fb634ebc..7b7858f158b4 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -21,6 +21,7 @@ void crst_table_free(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); +extern int page_table_allocate_pgste; int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned long key, bool nq); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 989cfae9e202..fc642399b489 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -12,12 +12,9 @@ #define _ASM_S390_PGTABLE_H /* - * The Linux memory management assumes a three-level page table setup. For - * s390 31 bit we "fold" the mid level into the top-level page table, so - * that we physically have the same two-level page table as the s390 mmu - * expects in 31 bit mode. For s390 64 bit we use three of the five levels - * the hardware provides (region first and region second tables are not - * used). + * The Linux memory management assumes a three-level page table setup. + * For s390 64 bit we use up to four of the five levels the hardware + * provides (region first tables are not used). * * The "pgd_xxx()" functions are trivial for a folded two-level * setup: the pgd is never bad, and a pmd always exists (as it's folded @@ -101,8 +98,8 @@ extern unsigned long zero_page_mask; #ifndef __ASSEMBLY__ /* - * The vmalloc and module area will always be on the topmost area of the kernel - * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules. + * The vmalloc and module area will always be on the topmost area of the + * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules. * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where * modules will reside. That makes sure that inter module branches always * happen without trampolines and in addition the placement within a 2GB frame @@ -131,38 +128,6 @@ static inline int is_module_addr(void *addr) } /* - * A 31 bit pagetable entry of S390 has following format: - * | PFRA | | OS | - * 0 0IP0 - * 00000000001111111111222222222233 - * 01234567890123456789012345678901 - * - * I Page-Invalid Bit: Page is not available for address-translation - * P Page-Protection Bit: Store access not possible for page - * - * A 31 bit segmenttable entry of S390 has following format: - * | P-table origin | |PTL - * 0 IC - * 00000000001111111111222222222233 - * 01234567890123456789012345678901 - * - * I Segment-Invalid Bit: Segment is not available for address-translation - * C Common-Segment Bit: Segment is not private (PoP 3-30) - * PTL Page-Table-Length: Page-table length (PTL+1*16 entries -> up to 256) - * - * The 31 bit segmenttable origin of S390 has following format: - * - * |S-table origin | | STL | - * X **GPS - * 00000000001111111111222222222233 - * 01234567890123456789012345678901 - * - * X Space-Switch event: - * G Segment-Invalid Bit: * - * P Private-Space Bit: Segment is not private (PoP 3-30) - * S Storage-Alteration: - * STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048) - * * A 64 bit pagetable entry of S390 has following format: * | PFRA |0IPC| OS | * 0000000000111111111122222222223333333333444444444455555555556666 @@ -220,7 +185,6 @@ static inline int is_module_addr(void *addr) /* Software bits in the page table entry */ #define _PAGE_PRESENT 0x001 /* SW pte present bit */ -#define _PAGE_TYPE 0x002 /* SW pte type bit */ #define _PAGE_YOUNG 0x004 /* SW pte young bit */ #define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ #define _PAGE_READ 0x010 /* SW pte read bit */ @@ -240,31 +204,34 @@ static inline int is_module_addr(void *addr) * table lock held. * * The following table gives the different possible bit combinations for - * the pte hardware and software bits in the last 12 bits of a pte: + * the pte hardware and software bits in the last 12 bits of a pte + * (. unassigned bit, x don't care, t swap type): * * 842100000000 * 000084210000 * 000000008421 - * .IR...wrdytp - * empty .10...000000 - * swap .10...xxxx10 - * file .11...xxxxx0 - * prot-none, clean, old .11...000001 - * prot-none, clean, young .11...000101 - * prot-none, dirty, old .10...001001 - * prot-none, dirty, young .10...001101 - * read-only, clean, old .11...010001 - * read-only, clean, young .01...010101 - * read-only, dirty, old .11...011001 - * read-only, dirty, young .01...011101 - * read-write, clean, old .11...110001 - * read-write, clean, young .01...110101 - * read-write, dirty, old .10...111001 - * read-write, dirty, young .00...111101 + * .IR.uswrdy.p + * empty .10.00000000 + * swap .11..ttttt.0 + * prot-none, clean, old .11.xx0000.1 + * prot-none, clean, young .11.xx0001.1 + * prot-none, dirty, old .10.xx0010.1 + * prot-none, dirty, young .10.xx0011.1 + * read-only, clean, old .11.xx0100.1 + * read-only, clean, young .01.xx0101.1 + * read-only, dirty, old .11.xx0110.1 + * read-only, dirty, young .01.xx0111.1 + * read-write, clean, old .11.xx1100.1 + * read-write, clean, young .01.xx1101.1 + * read-write, dirty, old .10.xx1110.1 + * read-write, dirty, young .00.xx1111.1 + * HW-bits: R read-only, I invalid + * SW-bits: p present, y young, d dirty, r read, w write, s special, + * u unused, l large * - * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 - * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 - * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402 + * pte_none is true for the bit pattern .10.00000000, pte == 0x400 + * pte_swap is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200 + * pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001 */ /* Bits in the segment/region table address-space-control-element */ @@ -335,6 +302,8 @@ static inline int is_module_addr(void *addr) * read-write, dirty, young 11..0...0...11 * The segment table origin is used to distinguish empty (origin==0) from * read-write, old segment table entries (origin!=0) + * HW-bits: R read-only, I invalid + * SW-bits: y young, d dirty, r read, w write */ #define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */ @@ -423,6 +392,15 @@ static inline int mm_has_pgste(struct mm_struct *mm) return 0; } +static inline int mm_alloc_pgste(struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + if (unlikely(mm->context.alloc_pgste)) + return 1; +#endif + return 0; +} + /* * In the case that a guest uses storage keys * faults should no longer be backed by zero pages @@ -582,10 +560,9 @@ static inline int pte_none(pte_t pte) static inline int pte_swap(pte_t pte) { - /* Bit pattern: (pte & 0x603) == 0x402 */ - return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | - _PAGE_TYPE | _PAGE_PRESENT)) - == (_PAGE_INVALID | _PAGE_TYPE); + /* Bit pattern: (pte & 0x201) == 0x200 */ + return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT)) + == _PAGE_PROTECT; } static inline int pte_special(pte_t pte) @@ -1586,51 +1563,51 @@ static inline int has_transparent_hugepage(void) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* - * 31 bit swap entry format: - * A page-table entry has some bits we have to treat in a special way. - * Bits 0, 20 and bit 23 have to be zero, otherwise an specification - * exception will occur instead of a page translation exception. The - * specifiation exception has the bad habit not to store necessary - * information in the lowcore. - * Bits 21, 22, 30 and 31 are used to indicate the page type. - * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 - * This leaves the bits 1-19 and bits 24-29 to store type and offset. - * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19 - * plus 24 for the offset. - * 0| offset |0110|o|type |00| - * 0 0000000001111111111 2222 2 22222 33 - * 0 1234567890123456789 0123 4 56789 01 - * * 64 bit swap entry format: * A page-table entry has some bits we have to treat in a special way. * Bits 52 and bit 55 have to be zero, otherwise an specification * exception will occur instead of a page translation exception. The * specifiation exception has the bad habit not to store necessary * information in the lowcore. - * Bits 53, 54, 62 and 63 are used to indicate the page type. - * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 - * This leaves the bits 0-51 and bits 56-61 to store type and offset. - * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 - * plus 56 for the offset. - * | offset |0110|o|type |00| - * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66 - * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23 + * Bits 54 and 63 are used to indicate the page type. + * A swap pte is indicated by bit pattern (pte & 0x201) == 0x200 + * This leaves the bits 0-51 and bits 56-62 to store type and offset. + * We use the 5 bits from 57-61 for the type and the 52 bits from 0-51 + * for the offset. + * | offset |01100|type |00| + * |0000000000111111111122222222223333333333444444444455|55555|55566|66| + * |0123456789012345678901234567890123456789012345678901|23456|78901|23| */ -#define __SWP_OFFSET_MASK (~0UL >> 11) +#define __SWP_OFFSET_MASK ((1UL << 52) - 1) +#define __SWP_OFFSET_SHIFT 12 +#define __SWP_TYPE_MASK ((1UL << 5) - 1) +#define __SWP_TYPE_SHIFT 2 static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) { pte_t pte; - offset &= __SWP_OFFSET_MASK; - pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) | - ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); + + pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT; + pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT; + pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT; return pte; } -#define __swp_type(entry) (((entry).val >> 2) & 0x1f) -#define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1)) -#define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) +static inline unsigned long __swp_type(swp_entry_t entry) +{ + return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK; +} + +static inline unsigned long __swp_offset(swp_entry_t entry) +{ + return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK; +} + +static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) +{ + return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) }; +} #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 210ffede0153..e617e74b7be2 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -14,20 +14,23 @@ static inline pmd_t __pte_to_pmd(pte_t pte) /* * Convert encoding pte bits pmd bits - * .IR...wrdytp dy..R...I...wr - * empty .10...000000 -> 00..0...1...00 - * prot-none, clean, old .11...000001 -> 00..1...1...00 - * prot-none, clean, young .11...000101 -> 01..1...1...00 - * prot-none, dirty, old .10...001001 -> 10..1...1...00 - * prot-none, dirty, young .10...001101 -> 11..1...1...00 - * read-only, clean, old .11...010001 -> 00..1...1...01 - * read-only, clean, young .01...010101 -> 01..1...0...01 - * read-only, dirty, old .11...011001 -> 10..1...1...01 - * read-only, dirty, young .01...011101 -> 11..1...0...01 - * read-write, clean, old .11...110001 -> 00..0...1...11 - * read-write, clean, young .01...110101 -> 01..0...0...11 - * read-write, dirty, old .10...111001 -> 10..0...1...11 - * read-write, dirty, young .00...111101 -> 11..0...0...11 + * lIR.uswrdy.p dy..R...I...wr + * empty 010.000000.0 -> 00..0...1...00 + * prot-none, clean, old 111.000000.1 -> 00..1...1...00 + * prot-none, clean, young 111.000001.1 -> 01..1...1...00 + * prot-none, dirty, old 111.000010.1 -> 10..1...1...00 + * prot-none, dirty, young 111.000011.1 -> 11..1...1...00 + * read-only, clean, old 111.000100.1 -> 00..1...1...01 + * read-only, clean, young 101.000101.1 -> 01..1...0...01 + * read-only, dirty, old 111.000110.1 -> 10..1...1...01 + * read-only, dirty, young 101.000111.1 -> 11..1...0...01 + * read-write, clean, old 111.001100.1 -> 00..1...1...11 + * read-write, clean, young 101.001101.1 -> 01..1...0...11 + * read-write, dirty, old 110.001110.1 -> 10..0...1...11 + * read-write, dirty, young 100.001111.1 -> 11..0...0...11 + * HW-bits: R read-only, I invalid + * SW-bits: p present, y young, d dirty, r read, w write, s special, + * u unused, l large */ if (pte_present(pte)) { pmd_val(pmd) = pte_val(pte) & PAGE_MASK; @@ -48,20 +51,23 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) /* * Convert encoding pmd bits pte bits - * dy..R...I...wr .IR...wrdytp - * empty 00..0...1...00 -> .10...001100 - * prot-none, clean, old 00..0...1...00 -> .10...000001 - * prot-none, clean, young 01..0...1...00 -> .10...000101 - * prot-none, dirty, old 10..0...1...00 -> .10...001001 - * prot-none, dirty, young 11..0...1...00 -> .10...001101 - * read-only, clean, old 00..1...1...01 -> .11...010001 - * read-only, clean, young 01..1...1...01 -> .11...010101 - * read-only, dirty, old 10..1...1...01 -> .11...011001 - * read-only, dirty, young 11..1...1...01 -> .11...011101 - * read-write, clean, old 00..0...1...11 -> .10...110001 - * read-write, clean, young 01..0...1...11 -> .10...110101 - * read-write, dirty, old 10..0...1...11 -> .10...111001 - * read-write, dirty, young 11..0...1...11 -> .10...111101 + * dy..R...I...wr lIR.uswrdy.p + * empty 00..0...1...00 -> 010.000000.0 + * prot-none, clean, old 00..1...1...00 -> 111.000000.1 + * prot-none, clean, young 01..1...1...00 -> 111.000001.1 + * prot-none, dirty, old 10..1...1...00 -> 111.000010.1 + * prot-none, dirty, young 11..1...1...00 -> 111.000011.1 + * read-only, clean, old 00..1...1...01 -> 111.000100.1 + * read-only, clean, young 01..1...0...01 -> 101.000101.1 + * read-only, dirty, old 10..1...1...01 -> 111.000110.1 + * read-only, dirty, young 11..1...0...01 -> 101.000111.1 + * read-write, clean, old 00..1...1...11 -> 111.001100.1 + * read-write, clean, young 01..1...0...11 -> 101.001101.1 + * read-write, dirty, old 10..0...1...11 -> 110.001110.1 + * read-write, dirty, young 11..0...0...11 -> 100.001111.1 + * HW-bits: R read-only, I invalid + * SW-bits: p present, y young, d dirty, r read, w write, s special, + * u unused, l large */ if (pmd_present(pmd)) { pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE; @@ -70,8 +76,8 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4; pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5; pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT); - pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; - pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10; } else pte_val(pte) = _PAGE_INVALID; return pte; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 33f589459113..b33f66110ca9 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -18,6 +18,7 @@ #include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/swapops.h> +#include <linux/sysctl.h> #include <linux/ksm.h> #include <linux/mman.h> @@ -920,6 +921,40 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) } EXPORT_SYMBOL(get_guest_storage_key); +static int page_table_allocate_pgste_min = 0; +static int page_table_allocate_pgste_max = 1; +int page_table_allocate_pgste = 0; +EXPORT_SYMBOL(page_table_allocate_pgste); + +static struct ctl_table page_table_sysctl[] = { + { + .procname = "allocate_pgste", + .data = &page_table_allocate_pgste, + .maxlen = sizeof(int), + .mode = S_IRUGO | S_IWUSR, + .proc_handler = proc_dointvec, + .extra1 = &page_table_allocate_pgste_min, + .extra2 = &page_table_allocate_pgste_max, + }, + { } +}; + +static struct ctl_table page_table_sysctl_dir[] = { + { + .procname = "vm", + .maxlen = 0, + .mode = 0555, + .child = page_table_sysctl, + }, + { } +}; + +static int __init page_table_register_sysctl(void) +{ + return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; +} +__initcall(page_table_register_sysctl); + #else /* CONFIG_PGSTE */ static inline int page_table_with_pgste(struct page *page) @@ -963,7 +998,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) struct page *uninitialized_var(page); unsigned int mask, bit; - if (mm_has_pgste(mm)) + if (mm_alloc_pgste(mm)) return page_table_alloc_pgste(mm); /* Allocate fragments of a 4K page as 1K/2K page table */ spin_lock_bh(&mm->context.list_lock); @@ -1165,116 +1200,25 @@ static inline void thp_split_mm(struct mm_struct *mm) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, - struct mm_struct *mm, pud_t *pud, - unsigned long addr, unsigned long end) -{ - unsigned long next, *table, *new; - struct page *page; - spinlock_t *ptl; - pmd_t *pmd; - - pmd = pmd_offset(pud, addr); - do { - next = pmd_addr_end(addr, end); -again: - if (pmd_none_or_clear_bad(pmd)) - continue; - table = (unsigned long *) pmd_deref(*pmd); - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (page_table_with_pgste(page)) - continue; - /* Allocate new page table with pgstes */ - new = page_table_alloc_pgste(mm); - if (!new) - return -ENOMEM; - - ptl = pmd_lock(mm, pmd); - if (likely((unsigned long *) pmd_deref(*pmd) == table)) { - /* Nuke pmd entry pointing to the "short" page table */ - pmdp_flush_lazy(mm, addr, pmd); - pmd_clear(pmd); - /* Copy ptes from old table to new table */ - memcpy(new, table, PAGE_SIZE/2); - clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); - /* Establish new table */ - pmd_populate(mm, pmd, (pte_t *) new); - /* Free old table with rcu, there might be a walker! */ - page_table_free_rcu(tlb, table, addr); - new = NULL; - } - spin_unlock(ptl); - if (new) { - page_table_free_pgste(new); - goto again; - } - } while (pmd++, addr = next, addr != end); - - return addr; -} - -static unsigned long page_table_realloc_pud(struct mmu_gather *tlb, - struct mm_struct *mm, pgd_t *pgd, - unsigned long addr, unsigned long end) -{ - unsigned long next; - pud_t *pud; - - pud = pud_offset(pgd, addr); - do { - next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) - continue; - next = page_table_realloc_pmd(tlb, mm, pud, addr, next); - if (unlikely(IS_ERR_VALUE(next))) - return next; - } while (pud++, addr = next, addr != end); - - return addr; -} - -static unsigned long page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned long addr, unsigned long end) -{ - unsigned long next; - pgd_t *pgd; - - pgd = pgd_offset(mm, addr); - do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - next = page_table_realloc_pud(tlb, mm, pgd, addr, next); - if (unlikely(IS_ERR_VALUE(next))) - return next; - } while (pgd++, addr = next, addr != end); - - return 0; -} - /* * switch on pgstes for its userspace process (for kvm) */ int s390_enable_sie(void) { - struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; - struct mmu_gather tlb; + struct mm_struct *mm = current->mm; /* Do we have pgstes? if yes, we are done */ - if (mm_has_pgste(tsk->mm)) + if (mm_has_pgste(mm)) return 0; - + /* Fail if the page tables are 2K */ + if (!mm_alloc_pgste(mm)) + return -EINVAL; down_write(&mm->mmap_sem); + mm->context.has_pgste = 1; /* split thp mappings and disable thp for future mappings */ thp_split_mm(mm); - /* Reallocate the page tables with pgstes */ - tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE); - if (!page_table_realloc(&tlb, mm, 0, TASK_SIZE)) - mm->context.has_pgste = 1; - tlb_finish_mmu(&tlb, 0, TASK_SIZE); up_write(&mm->mmap_sem); - return mm->context.has_pgste ? 0 : -ENOMEM; + return 0; } EXPORT_SYMBOL_GPL(s390_enable_sie); diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 6873f006f7d0..d366675e4bf8 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -774,7 +774,7 @@ static void __init zone_sizes_init(void) * though, there'll be no lowmem, so we just alloc_bootmem * the memmap. There will be no percpu memory either. */ - if (i != 0 && cpumask_test_cpu(i, &isolnodes)) { + if (i != 0 && node_isset(i, isolnodes)) { node_memmap_pfn[i] = alloc_bootmem_pfn(0, memmap_size, 0); BUG_ON(node_percpu[i] != 0); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index c8a18e4ee9dc..720ceeb7fa9b 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1298,21 +1298,22 @@ static int table_load(struct dm_ioctl *param, size_t param_size) goto err_unlock_md_type; } - if (dm_get_md_type(md) == DM_TYPE_NONE) + if (dm_get_md_type(md) == DM_TYPE_NONE) { /* Initial table load: acquire type of table. */ dm_set_md_type(md, dm_table_get_type(t)); - else if (dm_get_md_type(md) != dm_table_get_type(t)) { + + /* setup md->queue to reflect md's type (may block) */ + r = dm_setup_md_queue(md); + if (r) { + DMWARN("unable to set up device queue for new table."); + goto err_unlock_md_type; + } + } else if (dm_get_md_type(md) != dm_table_get_type(t)) { DMWARN("can't change device type after initial table load."); r = -EINVAL; goto err_unlock_md_type; } - /* setup md->queue to reflect md's type (may block) */ - r = dm_setup_md_queue(md); - if (r) { - DMWARN("unable to set up device queue for new table."); - goto err_unlock_md_type; - } dm_unlock_md_type(md); /* stage inactive table */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f8c7ca3e8947..a930b72314ac 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1082,18 +1082,26 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) dm_put(md); } -static void free_rq_clone(struct request *clone) +static void free_rq_clone(struct request *clone, bool must_be_mapped) { struct dm_rq_target_io *tio = clone->end_io_data; struct mapped_device *md = tio->md; + WARN_ON_ONCE(must_be_mapped && !clone->q); + blk_rq_unprep_clone(clone); - if (clone->q->mq_ops) + if (md->type == DM_TYPE_MQ_REQUEST_BASED) + /* stacked on blk-mq queue(s) */ tio->ti->type->release_clone_rq(clone); else if (!md->queue->mq_ops) /* request_fn queue stacked on request_fn queue(s) */ free_clone_request(md, clone); + /* + * NOTE: for the blk-mq queue stacked on request_fn queue(s) case: + * no need to call free_clone_request() because we leverage blk-mq by + * allocating the clone at the end of the blk-mq pdu (see: clone_rq) + */ if (!md->queue->mq_ops) free_rq_tio(tio); @@ -1124,7 +1132,7 @@ static void dm_end_request(struct request *clone, int error) rq->sense_len = clone->sense_len; } - free_rq_clone(clone); + free_rq_clone(clone, true); if (!rq->q->mq_ops) blk_end_request_all(rq, error); else @@ -1143,7 +1151,7 @@ static void dm_unprep_request(struct request *rq) } if (clone) - free_rq_clone(clone); + free_rq_clone(clone, false); } /* @@ -2662,9 +2670,6 @@ static int dm_init_request_based_queue(struct mapped_device *md) { struct request_queue *q = NULL; - if (md->queue->elevator) - return 0; - /* Fully initialize the queue */ q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); if (!q) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 78dde56ae6e6..3a10551d64cf 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -82,6 +82,8 @@ #include <net/bond_3ad.h> #include <net/bond_alb.h> +#include "bonding_priv.h" + /*---------------------------- Module parameters ----------------------------*/ /* monitor all links that often (in milliseconds). <=0 disables monitoring */ diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index 62694cfc05b6..b20b35acb47d 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -4,6 +4,7 @@ #include <net/netns/generic.h> #include <net/bonding.h> +#include "bonding_priv.h" static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) diff --git a/drivers/net/bonding/bonding_priv.h b/drivers/net/bonding/bonding_priv.h new file mode 100644 index 000000000000..5a4d81a9437c --- /dev/null +++ b/drivers/net/bonding/bonding_priv.h @@ -0,0 +1,25 @@ +/* + * Bond several ethernet interfaces into a Cisco, running 'Etherchannel'. + * + * Portions are (c) Copyright 1995 Simon "Guru Aleph-Null" Janes + * NCM: Network and Communications Management, Inc. + * + * BUT, I'm the one who modified it for ethernet, so: + * (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov + * + * This software may be used and distributed according to the terms + * of the GNU Public License, incorporated herein by reference. + * + */ + +#ifndef _BONDING_PRIV_H +#define _BONDING_PRIV_H + +#define DRV_VERSION "3.7.1" +#define DRV_RELDATE "April 27, 2011" +#define DRV_NAME "bonding" +#define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" + +#define bond_version DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n" + +#endif diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index 58808f651452..e8c96b8e86f4 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -112,7 +112,7 @@ config PCH_CAN config CAN_GRCAN tristate "Aeroflex Gaisler GRCAN and GRHCAN CAN devices" - depends on OF + depends on OF && HAS_DMA ---help--- Say Y here if you want to use Aeroflex Gaisler GRCAN or GRHCAN. Note that the driver supports little endian, even though little diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 4643914859b2..8b17a9065b0b 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1102,7 +1102,7 @@ static void kvaser_usb_rx_can_err(const struct kvaser_usb_net_priv *priv, if (msg->u.rx_can_header.flag & (MSG_FLAG_ERROR_FRAME | MSG_FLAG_NERR)) { - netdev_err(priv->netdev, "Unknow error (flags: 0x%02x)\n", + netdev_err(priv->netdev, "Unknown error (flags: 0x%02x)\n", msg->u.rx_can_header.flag); stats->rx_errors++; diff --git a/drivers/net/ethernet/8390/etherh.c b/drivers/net/ethernet/8390/etherh.c index b36ee9e0d220..d686b9cac29f 100644 --- a/drivers/net/ethernet/8390/etherh.c +++ b/drivers/net/ethernet/8390/etherh.c @@ -523,7 +523,7 @@ static int etherh_addr(char *addr, struct expansion_card *ec) char *s; if (!ecard_readchunk(&cd, ec, 0xf5, 0)) { - printk(KERN_ERR "%s: unable to read podule description string\n", + printk(KERN_ERR "%s: unable to read module description string\n", dev_name(&ec->dev)); goto no_addr; } diff --git a/drivers/net/ethernet/altera/altera_msgdmahw.h b/drivers/net/ethernet/altera/altera_msgdmahw.h index eba070f16782..89cd11d86642 100644 --- a/drivers/net/ethernet/altera/altera_msgdmahw.h +++ b/drivers/net/ethernet/altera/altera_msgdmahw.h @@ -58,15 +58,12 @@ struct msgdma_extended_desc { /* Tx buffer control flags */ #define MSGDMA_DESC_CTL_TX_FIRST (MSGDMA_DESC_CTL_GEN_SOP | \ - MSGDMA_DESC_CTL_TR_ERR_IRQ | \ MSGDMA_DESC_CTL_GO) -#define MSGDMA_DESC_CTL_TX_MIDDLE (MSGDMA_DESC_CTL_TR_ERR_IRQ | \ - MSGDMA_DESC_CTL_GO) +#define MSGDMA_DESC_CTL_TX_MIDDLE (MSGDMA_DESC_CTL_GO) #define MSGDMA_DESC_CTL_TX_LAST (MSGDMA_DESC_CTL_GEN_EOP | \ MSGDMA_DESC_CTL_TR_COMP_IRQ | \ - MSGDMA_DESC_CTL_TR_ERR_IRQ | \ MSGDMA_DESC_CTL_GO) #define MSGDMA_DESC_CTL_TX_SINGLE (MSGDMA_DESC_CTL_GEN_SOP | \ diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 90a76306ad0f..0533c051a3e5 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -777,6 +777,8 @@ static int init_phy(struct net_device *dev) struct altera_tse_private *priv = netdev_priv(dev); struct phy_device *phydev; struct device_node *phynode; + bool fixed_link = false; + int rc = 0; /* Avoid init phy in case of no phy present */ if (!priv->phy_iface) @@ -789,13 +791,32 @@ static int init_phy(struct net_device *dev) phynode = of_parse_phandle(priv->device->of_node, "phy-handle", 0); if (!phynode) { - netdev_dbg(dev, "no phy-handle found\n"); - if (!priv->mdio) { - netdev_err(dev, - "No phy-handle nor local mdio specified\n"); - return -ENODEV; + /* check if a fixed-link is defined in device-tree */ + if (of_phy_is_fixed_link(priv->device->of_node)) { + rc = of_phy_register_fixed_link(priv->device->of_node); + if (rc < 0) { + netdev_err(dev, "cannot register fixed PHY\n"); + return rc; + } + + /* In the case of a fixed PHY, the DT node associated + * to the PHY is the Ethernet MAC DT node. + */ + phynode = of_node_get(priv->device->of_node); + fixed_link = true; + + netdev_dbg(dev, "fixed-link detected\n"); + phydev = of_phy_connect(dev, phynode, + &altera_tse_adjust_link, + 0, priv->phy_iface); + } else { + netdev_dbg(dev, "no phy-handle found\n"); + if (!priv->mdio) { + netdev_err(dev, "No phy-handle nor local mdio specified\n"); + return -ENODEV; + } + phydev = connect_local_phy(dev); } - phydev = connect_local_phy(dev); } else { netdev_dbg(dev, "phy-handle found\n"); phydev = of_phy_connect(dev, phynode, @@ -819,10 +840,10 @@ static int init_phy(struct net_device *dev) /* Broken HW is sometimes missing the pull-up resistor on the * MDIO line, which results in reads to non-existent devices returning * 0 rather than 0xffff. Catch this here and treat 0 as a non-existent - * device as well. + * device as well. If a fixed-link is used the phy_id is always 0. * Note: phydev->phy_id is the result of reading the UID PHY registers. */ - if (phydev->phy_id == 0) { + if ((phydev->phy_id == 0) && !fixed_link) { netdev_err(dev, "Bad PHY UID 0x%08x\n", phydev->phy_id); phy_disconnect(phydev); return -ENODEV; diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index c638c85f3954..089c269637b7 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -179,7 +179,7 @@ config SUNLANCE config AMD_XGBE tristate "AMD 10GbE Ethernet driver" - depends on (OF_NET || ACPI) && HAS_IOMEM + depends on (OF_NET || ACPI) && HAS_IOMEM && HAS_DMA select PHYLIB select AMD_XGBE_PHY select BITREVERSE diff --git a/drivers/net/ethernet/arc/Kconfig b/drivers/net/ethernet/arc/Kconfig index 8e262e2b39b6..dea29ee24da4 100644 --- a/drivers/net/ethernet/arc/Kconfig +++ b/drivers/net/ethernet/arc/Kconfig @@ -25,8 +25,7 @@ config ARC_EMAC_CORE config ARC_EMAC tristate "ARC EMAC support" select ARC_EMAC_CORE - depends on OF_IRQ - depends on OF_NET + depends on OF_IRQ && OF_NET && HAS_DMA ---help--- On some legacy ARC (Synopsys) FPGA boards such as ARCAngel4/ML50x non-standard on-chip ethernet device ARC EMAC 10/100 is used. @@ -35,7 +34,7 @@ config ARC_EMAC config EMAC_ROCKCHIP tristate "Rockchip EMAC support" select ARC_EMAC_CORE - depends on OF_IRQ && OF_NET && REGULATOR + depends on OF_IRQ && OF_NET && REGULATOR && HAS_DMA ---help--- Support for Rockchip RK3066/RK3188 EMAC ethernet controllers. This selects Rockchip SoC glue layer support for the diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index de77d3a74abc..21e3c38c7c75 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1260,7 +1260,7 @@ static int bgmac_poll(struct napi_struct *napi, int weight) /* Poll again if more events arrived in the meantime */ if (bgmac_read(bgmac, BGMAC_INT_STATUS) & (BGMAC_IS_TX0 | BGMAC_IS_RX)) - return handled; + return weight; if (handled < weight) { napi_complete(napi); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 2f63467bce46..3558a36b1c2d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2485,8 +2485,10 @@ static void bnx2x_bz_fp(struct bnx2x *bp, int index) else if (bp->flags & GRO_ENABLE_FLAG) fp->mode = TPA_MODE_GRO; - /* We don't want TPA on an FCoE L2 ring */ - if (IS_FCOE_FP(fp)) + /* We don't want TPA if it's disabled in bp + * or if this is an FCoE L2 ring. + */ + if (bp->disable_tpa || IS_FCOE_FP(fp)) fp->disable_tpa = 1; } @@ -4809,6 +4811,23 @@ netdev_features_t bnx2x_fix_features(struct net_device *dev, { struct bnx2x *bp = netdev_priv(dev); + if (pci_num_vf(bp->pdev)) { + netdev_features_t changed = dev->features ^ features; + + /* Revert the requested changes in features if they + * would require internal reload of PF in bnx2x_set_features(). + */ + if (!(features & NETIF_F_RXCSUM) && !bp->disable_tpa) { + features &= ~NETIF_F_RXCSUM; + features |= dev->features & NETIF_F_RXCSUM; + } + + if (changed & NETIF_F_LOOPBACK) { + features &= ~NETIF_F_LOOPBACK; + features |= dev->features & NETIF_F_LOOPBACK; + } + } + /* TPA requires Rx CSUM offloading */ if (!(features & NETIF_F_RXCSUM)) { features &= ~NETIF_F_LRO; @@ -4839,15 +4858,18 @@ int bnx2x_set_features(struct net_device *dev, netdev_features_t features) else flags &= ~GRO_ENABLE_FLAG; - if (features & NETIF_F_LOOPBACK) { - if (bp->link_params.loopback_mode != LOOPBACK_BMAC) { - bp->link_params.loopback_mode = LOOPBACK_BMAC; - bnx2x_reload = true; - } - } else { - if (bp->link_params.loopback_mode != LOOPBACK_NONE) { - bp->link_params.loopback_mode = LOOPBACK_NONE; - bnx2x_reload = true; + /* VFs or non SRIOV PFs should be able to change loopback feature */ + if (!pci_num_vf(bp->pdev)) { + if (features & NETIF_F_LOOPBACK) { + if (bp->link_params.loopback_mode != LOOPBACK_BMAC) { + bp->link_params.loopback_mode = LOOPBACK_BMAC; + bnx2x_reload = true; + } + } else { + if (bp->link_params.loopback_mode != LOOPBACK_NONE) { + bp->link_params.loopback_mode = LOOPBACK_NONE; + bnx2x_reload = true; + } } } @@ -4931,6 +4953,11 @@ int bnx2x_resume(struct pci_dev *pdev) } bp = netdev_priv(dev); + if (pci_num_vf(bp->pdev)) { + DP(BNX2X_MSG_IOV, "VFs are enabled, can not change MTU\n"); + return -EPERM; + } + if (bp->recovery_state != BNX2X_RECOVERY_DONE) { BNX2X_ERR("Handling parity error recovery. Try again later\n"); return -EAGAIN; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index e3d853cab7c9..48ed005ba73f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -1843,6 +1843,12 @@ static int bnx2x_set_ringparam(struct net_device *dev, "set ring params command parameters: rx_pending = %d, tx_pending = %d\n", ering->rx_pending, ering->tx_pending); + if (pci_num_vf(bp->pdev)) { + DP(BNX2X_MSG_IOV, + "VFs are enabled, can not change ring parameters\n"); + return -EPERM; + } + if (bp->recovery_state != BNX2X_RECOVERY_DONE) { DP(BNX2X_MSG_ETHTOOL, "Handling parity error recovery. Try again later\n"); @@ -2899,6 +2905,12 @@ static void bnx2x_self_test(struct net_device *dev, u8 is_serdes, link_up; int rc, cnt = 0; + if (pci_num_vf(bp->pdev)) { + DP(BNX2X_MSG_IOV, + "VFs are enabled, can not perform self test\n"); + return; + } + if (bp->recovery_state != BNX2X_RECOVERY_DONE) { netdev_err(bp->dev, "Handling parity error recovery. Try again later\n"); @@ -3468,6 +3480,11 @@ static int bnx2x_set_channels(struct net_device *dev, channels->rx_count, channels->tx_count, channels->other_count, channels->combined_count); + if (pci_num_vf(bp->pdev)) { + DP(BNX2X_MSG_IOV, "VFs are enabled, can not set channels\n"); + return -EPERM; + } + /* We don't support separate rx / tx channels. * We don't allow setting 'other' channels. */ diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 1270b189a9a2..069952fa5d64 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -18129,7 +18129,9 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, rtnl_lock(); - tp->pcierr_recovery = true; + /* We needn't recover from permanent error */ + if (state == pci_channel_io_frozen) + tp->pcierr_recovery = true; /* We probably don't have netdev yet */ if (!netdev || !netif_running(netdev)) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 9f5387249f24..665c29098e3c 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -1473,9 +1473,9 @@ static void macb_init_rings(struct macb *bp) for (i = 0; i < TX_RING_SIZE; i++) { bp->queues[0].tx_ring[i].addr = 0; bp->queues[0].tx_ring[i].ctrl = MACB_BIT(TX_USED); - bp->queues[0].tx_head = 0; - bp->queues[0].tx_tail = 0; } + bp->queues[0].tx_head = 0; + bp->queues[0].tx_tail = 0; bp->queues[0].tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP); bp->rx_tail = 0; diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 291c87036e17..2a0dc127df3f 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -3347,7 +3347,7 @@ static int ehea_register_memory_hooks(void) { int ret = 0; - if (atomic_inc_and_test(&ehea_memory_hooks_registered)) + if (atomic_inc_return(&ehea_memory_hooks_registered) > 1) return 0; ret = ehea_create_busmap(); @@ -3381,12 +3381,14 @@ out3: out2: unregister_reboot_notifier(&ehea_reboot_nb); out: + atomic_dec(&ehea_memory_hooks_registered); return ret; } static void ehea_unregister_memory_hooks(void) { - if (atomic_read(&ehea_memory_hooks_registered)) + /* Only remove the hooks if we've registered them */ + if (atomic_read(&ehea_memory_hooks_registered) == 0) return; unregister_reboot_notifier(&ehea_reboot_nb); diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index cd7675ac5bf9..18134766a114 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1238,7 +1238,7 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) - if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) + if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size) break; if (i == IBMVETH_NUM_BUFF_POOLS) @@ -1257,7 +1257,7 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { adapter->rx_buff_pool[i].active = 1; - if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) { + if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size) { dev->mtu = new_mtu; vio_cmo_set_dev_desired(viodev, ibmveth_get_desired_dma diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index af829c578400..7ace07dad6a3 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1508,7 +1508,8 @@ static int pxa168_eth_probe(struct platform_device *pdev) np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); if (!np) { dev_err(&pdev->dev, "missing phy-handle\n"); - return -EINVAL; + err = -EINVAL; + goto err_netdev; } of_property_read_u32(np, "reg", &pep->phy_addr); pep->phy_intf = of_get_phy_mode(pdev->dev.of_node); @@ -1526,7 +1527,7 @@ static int pxa168_eth_probe(struct platform_device *pdev) pep->smi_bus = mdiobus_alloc(); if (pep->smi_bus == NULL) { err = -ENOMEM; - goto err_base; + goto err_netdev; } pep->smi_bus->priv = pep; pep->smi_bus->name = "pxa168_eth smi"; @@ -1551,13 +1552,10 @@ err_mdiobus: mdiobus_unregister(pep->smi_bus); err_free_mdio: mdiobus_free(pep->smi_bus); -err_base: - iounmap(pep->base); err_netdev: free_netdev(dev); err_clk: - clk_disable(clk); - clk_put(clk); + clk_disable_unprepare(clk); return err; } @@ -1574,13 +1572,9 @@ static int pxa168_eth_remove(struct platform_device *pdev) if (pep->phy) phy_disconnect(pep->phy); if (pep->clk) { - clk_disable(pep->clk); - clk_put(pep->clk); - pep->clk = NULL; + clk_disable_unprepare(pep->clk); } - iounmap(pep->base); - pep->base = NULL; mdiobus_unregister(pep->smi_bus); mdiobus_free(pep->smi_bus); unregister_netdev(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 3f44e2bbb982..a2ddf3d75ff8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1102,20 +1102,21 @@ static int mlx4_en_check_rxfh_func(struct net_device *dev, u8 hfunc) struct mlx4_en_priv *priv = netdev_priv(dev); /* check if requested function is supported by the device */ - if ((hfunc == ETH_RSS_HASH_TOP && - !(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP)) || - (hfunc == ETH_RSS_HASH_XOR && - !(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_XOR))) - return -EINVAL; + if (hfunc == ETH_RSS_HASH_TOP) { + if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP)) + return -EINVAL; + if (!(dev->features & NETIF_F_RXHASH)) + en_warn(priv, "Toeplitz hash function should be used in conjunction with RX hashing for optimal performance\n"); + return 0; + } else if (hfunc == ETH_RSS_HASH_XOR) { + if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_XOR)) + return -EINVAL; + if (dev->features & NETIF_F_RXHASH) + en_warn(priv, "Enabling both XOR Hash function and RX Hashing can limit RPS functionality\n"); + return 0; + } - priv->rss_hash_fn = hfunc; - if (hfunc == ETH_RSS_HASH_TOP && !(dev->features & NETIF_F_RXHASH)) - en_warn(priv, - "Toeplitz hash function should be used in conjunction with RX hashing for optimal performance\n"); - if (hfunc == ETH_RSS_HASH_XOR && (dev->features & NETIF_F_RXHASH)) - en_warn(priv, - "Enabling both XOR Hash function and RX Hashing can limit RPS functionality\n"); - return 0; + return -EINVAL; } static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key, @@ -1189,6 +1190,8 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, priv->prof->rss_rings = rss_rings; if (key) memcpy(priv->rss_key, key, MLX4_EN_RSS_KEY_SIZE); + if (hfunc != ETH_RSS_HASH_NO_CHANGE) + priv->rss_hash_fn = hfunc; if (port_up) { err = mlx4_en_start_port(dev); diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 1412f5af05ec..2bae50292dcd 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -69,11 +69,7 @@ #include <net/ip.h> #include <net/tcp.h> #include <asm/byteorder.h> -#include <asm/io.h> #include <asm/processor.h> -#ifdef CONFIG_MTRR -#include <asm/mtrr.h> -#endif #include <net/busy_poll.h> #include "myri10ge_mcp.h" @@ -242,8 +238,7 @@ struct myri10ge_priv { unsigned int rdma_tags_available; int intr_coal_delay; __be32 __iomem *intr_coal_delay_ptr; - int mtrr; - int wc_enabled; + int wc_cookie; int down_cnt; wait_queue_head_t down_wq; struct work_struct watchdog_work; @@ -1905,7 +1900,7 @@ static const char myri10ge_gstrings_main_stats[][ETH_GSTRING_LEN] = { "tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors", "tx_heartbeat_errors", "tx_window_errors", /* device-specific stats */ - "tx_boundary", "WC", "irq", "MSI", "MSIX", + "tx_boundary", "irq", "MSI", "MSIX", "read_dma_bw_MBs", "write_dma_bw_MBs", "read_write_dma_bw_MBs", "serial_number", "watchdog_resets", #ifdef CONFIG_MYRI10GE_DCA @@ -1984,7 +1979,6 @@ myri10ge_get_ethtool_stats(struct net_device *netdev, data[i] = ((u64 *)&link_stats)[i]; data[i++] = (unsigned int)mgp->tx_boundary; - data[i++] = (unsigned int)mgp->wc_enabled; data[i++] = (unsigned int)mgp->pdev->irq; data[i++] = (unsigned int)mgp->msi_enabled; data[i++] = (unsigned int)mgp->msix_enabled; @@ -4040,14 +4034,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) mgp->board_span = pci_resource_len(pdev, 0); mgp->iomem_base = pci_resource_start(pdev, 0); - mgp->mtrr = -1; - mgp->wc_enabled = 0; -#ifdef CONFIG_MTRR - mgp->mtrr = mtrr_add(mgp->iomem_base, mgp->board_span, - MTRR_TYPE_WRCOMB, 1); - if (mgp->mtrr >= 0) - mgp->wc_enabled = 1; -#endif + mgp->wc_cookie = arch_phys_wc_add(mgp->iomem_base, mgp->board_span); mgp->sram = ioremap_wc(mgp->iomem_base, mgp->board_span); if (mgp->sram == NULL) { dev_err(&pdev->dev, "ioremap failed for %ld bytes at 0x%lx\n", @@ -4146,14 +4133,14 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto abort_with_state; } if (mgp->msix_enabled) - dev_info(dev, "%d MSI-X IRQs, tx bndry %d, fw %s, WC %s\n", + dev_info(dev, "%d MSI-X IRQs, tx bndry %d, fw %s, MTRR %s, WC Enabled\n", mgp->num_slices, mgp->tx_boundary, mgp->fw_name, - (mgp->wc_enabled ? "Enabled" : "Disabled")); + (mgp->wc_cookie > 0 ? "Enabled" : "Disabled")); else - dev_info(dev, "%s IRQ %d, tx bndry %d, fw %s, WC %s\n", + dev_info(dev, "%s IRQ %d, tx bndry %d, fw %s, MTRR %s, WC Enabled\n", mgp->msi_enabled ? "MSI" : "xPIC", pdev->irq, mgp->tx_boundary, mgp->fw_name, - (mgp->wc_enabled ? "Enabled" : "Disabled")); + (mgp->wc_cookie > 0 ? "Enabled" : "Disabled")); board_number++; return 0; @@ -4175,10 +4162,7 @@ abort_with_ioremap: iounmap(mgp->sram); abort_with_mtrr: -#ifdef CONFIG_MTRR - if (mgp->mtrr >= 0) - mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span); -#endif + arch_phys_wc_del(mgp->wc_cookie); dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd), mgp->cmd, mgp->cmd_bus); @@ -4220,11 +4204,7 @@ static void myri10ge_remove(struct pci_dev *pdev) pci_restore_state(pdev); iounmap(mgp->sram); - -#ifdef CONFIG_MTRR - if (mgp->mtrr >= 0) - mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span); -#endif + arch_phys_wc_del(mgp->wc_cookie); myri10ge_free_slices(mgp); kfree(mgp->msix_vectors); dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd), diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 49ce7ece5af3..c9cb486c753d 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -80,7 +80,8 @@ static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir) * assume the pin serves as pull-up. If direction is * output, the default value is high. */ - gpio_set_value(bitbang->mdo, 1 ^ bitbang->mdo_active_low); + gpio_set_value_cansleep(bitbang->mdo, + 1 ^ bitbang->mdo_active_low); return; } @@ -96,7 +97,8 @@ static int mdio_get(struct mdiobb_ctrl *ctrl) struct mdio_gpio_info *bitbang = container_of(ctrl, struct mdio_gpio_info, ctrl); - return gpio_get_value(bitbang->mdio) ^ bitbang->mdio_active_low; + return gpio_get_value_cansleep(bitbang->mdio) ^ + bitbang->mdio_active_low; } static void mdio_set(struct mdiobb_ctrl *ctrl, int what) @@ -105,9 +107,11 @@ static void mdio_set(struct mdiobb_ctrl *ctrl, int what) container_of(ctrl, struct mdio_gpio_info, ctrl); if (bitbang->mdo) - gpio_set_value(bitbang->mdo, what ^ bitbang->mdo_active_low); + gpio_set_value_cansleep(bitbang->mdo, + what ^ bitbang->mdo_active_low); else - gpio_set_value(bitbang->mdio, what ^ bitbang->mdio_active_low); + gpio_set_value_cansleep(bitbang->mdio, + what ^ bitbang->mdio_active_low); } static void mdc_set(struct mdiobb_ctrl *ctrl, int what) @@ -115,7 +119,7 @@ static void mdc_set(struct mdiobb_ctrl *ctrl, int what) struct mdio_gpio_info *bitbang = container_of(ctrl, struct mdio_gpio_info, ctrl); - gpio_set_value(bitbang->mdc, what ^ bitbang->mdc_active_low); + gpio_set_value_cansleep(bitbang->mdc, what ^ bitbang->mdc_active_low); } static struct mdiobb_ops mdio_gpio_ops = { diff --git a/drivers/net/phy/mdio-mux-gpio.c b/drivers/net/phy/mdio-mux-gpio.c index 1a87a585e74d..66edd99bc302 100644 --- a/drivers/net/phy/mdio-mux-gpio.c +++ b/drivers/net/phy/mdio-mux-gpio.c @@ -12,33 +12,30 @@ #include <linux/module.h> #include <linux/phy.h> #include <linux/mdio-mux.h> -#include <linux/of_gpio.h> +#include <linux/gpio/consumer.h> #define DRV_VERSION "1.1" #define DRV_DESCRIPTION "GPIO controlled MDIO bus multiplexer driver" -#define MDIO_MUX_GPIO_MAX_BITS 8 - struct mdio_mux_gpio_state { - struct gpio_desc *gpio[MDIO_MUX_GPIO_MAX_BITS]; - unsigned int num_gpios; + struct gpio_descs *gpios; void *mux_handle; }; static int mdio_mux_gpio_switch_fn(int current_child, int desired_child, void *data) { - int values[MDIO_MUX_GPIO_MAX_BITS]; - unsigned int n; struct mdio_mux_gpio_state *s = data; + int values[s->gpios->ndescs]; + unsigned int n; if (current_child == desired_child) return 0; - for (n = 0; n < s->num_gpios; n++) { + for (n = 0; n < s->gpios->ndescs; n++) values[n] = (desired_child >> n) & 1; - } - gpiod_set_array_cansleep(s->num_gpios, s->gpio, values); + + gpiod_set_array_cansleep(s->gpios->ndescs, s->gpios->desc, values); return 0; } @@ -46,56 +43,33 @@ static int mdio_mux_gpio_switch_fn(int current_child, int desired_child, static int mdio_mux_gpio_probe(struct platform_device *pdev) { struct mdio_mux_gpio_state *s; - int num_gpios; - unsigned int n; int r; - if (!pdev->dev.of_node) - return -ENODEV; - - num_gpios = of_gpio_count(pdev->dev.of_node); - if (num_gpios <= 0 || num_gpios > MDIO_MUX_GPIO_MAX_BITS) - return -ENODEV; - s = devm_kzalloc(&pdev->dev, sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; - s->num_gpios = num_gpios; - - for (n = 0; n < num_gpios; ) { - struct gpio_desc *gpio = gpiod_get_index(&pdev->dev, NULL, n, - GPIOD_OUT_LOW); - if (IS_ERR(gpio)) { - r = PTR_ERR(gpio); - goto err; - } - s->gpio[n] = gpio; - n++; - } + s->gpios = gpiod_get_array(&pdev->dev, NULL, GPIOD_OUT_LOW); + if (IS_ERR(s->gpios)) + return PTR_ERR(s->gpios); r = mdio_mux_init(&pdev->dev, mdio_mux_gpio_switch_fn, &s->mux_handle, s); - if (r == 0) { - pdev->dev.platform_data = s; - return 0; - } -err: - while (n) { - n--; - gpiod_put(s->gpio[n]); + if (r != 0) { + gpiod_put_array(s->gpios); + return r; } - return r; + + pdev->dev.platform_data = s; + return 0; } static int mdio_mux_gpio_remove(struct platform_device *pdev) { - unsigned int n; struct mdio_mux_gpio_state *s = dev_get_platdata(&pdev->dev); mdio_mux_uninit(s->mux_handle); - for (n = 0; n < s->num_gpios; n++) - gpiod_put(s->gpio[n]); + gpiod_put_array(s->gpios); return 0; } diff --git a/drivers/net/ppp/ppp_mppe.c b/drivers/net/ppp/ppp_mppe.c index 911b21602ff2..05005c660d4d 100644 --- a/drivers/net/ppp/ppp_mppe.c +++ b/drivers/net/ppp/ppp_mppe.c @@ -478,7 +478,6 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf, struct blkcipher_desc desc = { .tfm = state->arc4 }; unsigned ccount; int flushed = MPPE_BITS(ibuf) & MPPE_BIT_FLUSHED; - int sanity = 0; struct scatterlist sg_in[1], sg_out[1]; if (isize <= PPP_HDRLEN + MPPE_OVHD) { @@ -514,31 +513,19 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf, "mppe_decompress[%d]: ENCRYPTED bit not set!\n", state->unit); state->sanity_errors += 100; - sanity = 1; + goto sanity_error; } if (!state->stateful && !flushed) { printk(KERN_DEBUG "mppe_decompress[%d]: FLUSHED bit not set in " "stateless mode!\n", state->unit); state->sanity_errors += 100; - sanity = 1; + goto sanity_error; } if (state->stateful && ((ccount & 0xff) == 0xff) && !flushed) { printk(KERN_DEBUG "mppe_decompress[%d]: FLUSHED bit not set on " "flag packet!\n", state->unit); state->sanity_errors += 100; - sanity = 1; - } - - if (sanity) { - if (state->sanity_errors < SANITY_MAX) - return DECOMP_ERROR; - else - /* - * Take LCP down if the peer is sending too many bogons. - * We don't want to do this for a single or just a few - * instances since it could just be due to packet corruption. - */ - return DECOMP_FATALERROR; + goto sanity_error; } /* @@ -546,6 +533,13 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf, */ if (!state->stateful) { + /* Discard late packet */ + if ((ccount - state->ccount) % MPPE_CCOUNT_SPACE + > MPPE_CCOUNT_SPACE / 2) { + state->sanity_errors++; + goto sanity_error; + } + /* RFC 3078, sec 8.1. Rekey for every packet. */ while (state->ccount != ccount) { mppe_rekey(state, 0); @@ -649,6 +643,16 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf, state->sanity_errors >>= 1; return osize; + +sanity_error: + if (state->sanity_errors < SANITY_MAX) + return DECOMP_ERROR; + else + /* Take LCP down if the peer is sending too many bogons. + * We don't want to do this for a single or just a few + * instances since it could just be due to packet corruption. + */ + return DECOMP_FATALERROR; } /* diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 154116aafd0d..27a5f954f8e9 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -730,12 +730,8 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, /* Only change unicasts */ if (!(is_multicast_ether_addr(f->eth_addr) || is_zero_ether_addr(f->eth_addr))) { - int rc = vxlan_fdb_replace(f, ip, port, vni, + notify |= vxlan_fdb_replace(f, ip, port, vni, ifindex); - - if (rc < 0) - return rc; - notify |= rc; } else return -EOPNOTSUPP; } diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index c43aca69fb30..0fc3fe5fd5b8 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -667,6 +667,8 @@ static struct raw3215_info *raw3215_alloc_info(void) info->buffer = kzalloc(RAW3215_BUFFER_SIZE, GFP_KERNEL | GFP_DMA); info->inbuf = kzalloc(RAW3215_INBUF_SIZE, GFP_KERNEL | GFP_DMA); if (!info->buffer || !info->inbuf) { + kfree(info->inbuf); + kfree(info->buffer); kfree(info); return NULL; } diff --git a/drivers/sh/pm_runtime.c b/drivers/sh/pm_runtime.c index cd4c293f0dd0..fe8875f0d7be 100644 --- a/drivers/sh/pm_runtime.c +++ b/drivers/sh/pm_runtime.c @@ -80,9 +80,10 @@ static int __init sh_pm_runtime_init(void) if (IS_ENABLED(CONFIG_ARCH_SHMOBILE_MULTI)) { if (!of_machine_is_compatible("renesas,emev2") && !of_machine_is_compatible("renesas,r7s72100") && - !of_machine_is_compatible("renesas,r8a73a4") && #ifndef CONFIG_PM_GENERIC_DOMAINS_OF + !of_machine_is_compatible("renesas,r8a73a4") && !of_machine_is_compatible("renesas,r8a7740") && + !of_machine_is_compatible("renesas,sh73a0") && #endif !of_machine_is_compatible("renesas,r8a7778") && !of_machine_is_compatible("renesas,r8a7779") && @@ -90,9 +91,7 @@ static int __init sh_pm_runtime_init(void) !of_machine_is_compatible("renesas,r8a7791") && !of_machine_is_compatible("renesas,r8a7792") && !of_machine_is_compatible("renesas,r8a7793") && - !of_machine_is_compatible("renesas,r8a7794") && - !of_machine_is_compatible("renesas,sh7372") && - !of_machine_is_compatible("renesas,sh73a0")) + !of_machine_is_compatible("renesas,r8a7794")) return 0; } diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 08da4d3e2162..46bcebba54b2 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1998,6 +1998,8 @@ pci_wch_ch38x_setup(struct serial_private *priv, #define PCIE_DEVICE_ID_WCH_CH382_2S1P 0x3250 #define PCIE_DEVICE_ID_WCH_CH384_4S 0x3470 +#define PCI_DEVICE_ID_EXAR_XR17V8358 0x8358 + /* Unknown vendors/cards - this should not be in linux/pci_ids.h */ #define PCI_SUBDEVICE_ID_UNKNOWN_0x1584 0x1584 #define PCI_SUBDEVICE_ID_UNKNOWN_0x1588 0x1588 @@ -2520,6 +2522,13 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .subdevice = PCI_ANY_ID, .setup = pci_xr17v35x_setup, }, + { + .vendor = PCI_VENDOR_ID_EXAR, + .device = PCI_DEVICE_ID_EXAR_XR17V8358, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_xr17v35x_setup, + }, /* * Xircom cards */ @@ -2999,6 +3008,7 @@ enum pci_board_num_t { pbn_exar_XR17V352, pbn_exar_XR17V354, pbn_exar_XR17V358, + pbn_exar_XR17V8358, pbn_exar_ibm_saturn, pbn_pasemi_1682M, pbn_ni8430_2, @@ -3685,6 +3695,14 @@ static struct pciserial_board pci_boards[] = { .reg_shift = 0, .first_offset = 0, }, + [pbn_exar_XR17V8358] = { + .flags = FL_BASE0, + .num_ports = 16, + .base_baud = 7812500, + .uart_offset = 0x400, + .reg_shift = 0, + .first_offset = 0, + }, [pbn_exar_ibm_saturn] = { .flags = FL_BASE0, .num_ports = 1, @@ -5080,7 +5098,7 @@ static struct pci_device_id serial_pci_tbl[] = { 0, 0, pbn_exar_XR17C158 }, /* - * Exar Corp. XR17V35[248] Dual/Quad/Octal PCIe UARTs + * Exar Corp. XR17V[48]35[248] Dual/Quad/Octal/Hexa PCIe UARTs */ { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V352, PCI_ANY_ID, PCI_ANY_ID, @@ -5094,7 +5112,10 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_exar_XR17V358 }, - + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V8358, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_exar_XR17V8358 }, /* * Topic TP560 Data/Fax/Voice 56k modem (reported by Evan Clarke) */ diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index d58fe4763d9e..27dade29646b 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -880,6 +880,7 @@ static int atmel_prepare_tx_dma(struct uart_port *port) config.direction = DMA_MEM_TO_DEV; config.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; config.dst_addr = port->mapbase + ATMEL_US_THR; + config.dst_maxburst = 1; ret = dmaengine_slave_config(atmel_port->chan_tx, &config); @@ -1059,6 +1060,7 @@ static int atmel_prepare_rx_dma(struct uart_port *port) config.direction = DMA_DEV_TO_MEM; config.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; config.src_addr = port->mapbase + ATMEL_US_RHR; + config.src_maxburst = 1; ret = dmaengine_slave_config(atmel_port->chan_rx, &config); diff --git a/drivers/tty/serial/of_serial.c b/drivers/tty/serial/of_serial.c index 5b73afb9f9f3..137381e649e5 100644 --- a/drivers/tty/serial/of_serial.c +++ b/drivers/tty/serial/of_serial.c @@ -346,7 +346,6 @@ static const struct of_device_id of_platform_serial_table[] = { { .compatible = "ibm,qpace-nwp-serial", .data = (void *)PORT_NWPSERIAL, }, #endif - { .type = "serial", .data = (void *)PORT_UNKNOWN, }, { /* end of list */ }, }; diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index cf08876922f1..a0ae942d9562 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -1068,8 +1068,9 @@ static int s3c64xx_serial_startup(struct uart_port *port) spin_lock_irqsave(&port->lock, flags); ufcon = rd_regl(port, S3C2410_UFCON); - ufcon |= S3C2410_UFCON_RESETRX | S3C2410_UFCON_RESETTX | - S5PV210_UFCON_RXTRIG8; + ufcon |= S3C2410_UFCON_RESETRX | S5PV210_UFCON_RXTRIG8; + if (!uart_console(port)) + ufcon |= S3C2410_UFCON_RESETTX; wr_regl(port, S3C2410_UFCON, ufcon); enable_rx_pio(ourport); diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index eb5b03be9dfd..0b7bb12dfc68 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1770,7 +1770,7 @@ static const struct file_operations uart_proc_fops = { * @port: the port to write the message * @s: array of characters * @count: number of characters in string to write - * @write: function to write character to port + * @putchar: function to write character to port */ void uart_console_write(struct uart_port *port, const char *s, unsigned int count, diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c index 708eead850b0..b1c6bd3d483f 100644 --- a/drivers/tty/serial/uartlite.c +++ b/drivers/tty/serial/uartlite.c @@ -632,7 +632,8 @@ MODULE_DEVICE_TABLE(of, ulite_of_match); static int ulite_probe(struct platform_device *pdev) { - struct resource *res, *res2; + struct resource *res; + int irq; int id = pdev->id; #ifdef CONFIG_OF const __be32 *prop; @@ -646,11 +647,11 @@ static int ulite_probe(struct platform_device *pdev) if (!res) return -ENODEV; - res2 = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!res2) - return -ENODEV; + irq = platform_get_irq(pdev, 0); + if (irq <= 0) + return -ENXIO; - return ulite_assign(&pdev->dev, id, res->start, res2->start); + return ulite_assign(&pdev->dev, id, res->start, irq); } static int ulite_remove(struct platform_device *pdev) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index f218ec658f5d..3ddbac767db3 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -1331,9 +1331,9 @@ static SIMPLE_DEV_PM_OPS(cdns_uart_dev_pm_ops, cdns_uart_suspend, */ static int cdns_uart_probe(struct platform_device *pdev) { - int rc, id; + int rc, id, irq; struct uart_port *port; - struct resource *res, *res2; + struct resource *res; struct cdns_uart *cdns_uart_data; cdns_uart_data = devm_kzalloc(&pdev->dev, sizeof(*cdns_uart_data), @@ -1380,9 +1380,9 @@ static int cdns_uart_probe(struct platform_device *pdev) goto err_out_clk_disable; } - res2 = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!res2) { - rc = -ENODEV; + irq = platform_get_irq(pdev, 0); + if (irq <= 0) { + rc = -ENXIO; goto err_out_clk_disable; } @@ -1411,7 +1411,7 @@ static int cdns_uart_probe(struct platform_device *pdev) * and triggers invocation of the config_port() entry point. */ port->mapbase = res->start; - port->irq = res2->start; + port->irq = irq; port->dev = &pdev->dev; port->uartclk = clk_get_rate(cdns_uart_data->uartclk); port->private_data = cdns_uart_data; diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 632fc8152061..8e53fe469664 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -536,7 +536,7 @@ EXPORT_SYMBOL(tty_termios_hw_change); * Locking: termios_rwsem */ -static int tty_set_termios(struct tty_struct *tty, struct ktermios *new_termios) +int tty_set_termios(struct tty_struct *tty, struct ktermios *new_termios) { struct ktermios old_termios; struct tty_ldisc *ld; @@ -569,6 +569,7 @@ static int tty_set_termios(struct tty_struct *tty, struct ktermios *new_termios) up_write(&tty->termios_rwsem); return 0; } +EXPORT_SYMBOL_GPL(tty_set_termios); /** * set_termios - set termios values for a tty diff --git a/drivers/usb/chipidea/otg_fsm.c b/drivers/usb/chipidea/otg_fsm.c index 083acf45ad5a..19d655a743b5 100644 --- a/drivers/usb/chipidea/otg_fsm.c +++ b/drivers/usb/chipidea/otg_fsm.c @@ -520,7 +520,6 @@ static int ci_otg_start_host(struct otg_fsm *fsm, int on) { struct ci_hdrc *ci = container_of(fsm, struct ci_hdrc, fsm); - mutex_unlock(&fsm->lock); if (on) { ci_role_stop(ci); ci_role_start(ci, CI_ROLE_HOST); @@ -529,7 +528,6 @@ static int ci_otg_start_host(struct otg_fsm *fsm, int on) hw_device_reset(ci); ci_role_start(ci, CI_ROLE_GADGET); } - mutex_lock(&fsm->lock); return 0; } @@ -537,12 +535,10 @@ static int ci_otg_start_gadget(struct otg_fsm *fsm, int on) { struct ci_hdrc *ci = container_of(fsm, struct ci_hdrc, fsm); - mutex_unlock(&fsm->lock); if (on) usb_gadget_vbus_connect(&ci->gadget); else usb_gadget_vbus_disconnect(&ci->gadget); - mutex_lock(&fsm->lock); return 0; } diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 3e15add665e2..5c8f58114677 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1142,11 +1142,16 @@ static int acm_probe(struct usb_interface *intf, } while (buflen > 0) { + elength = buffer[0]; + if (!elength) { + dev_err(&intf->dev, "skipping garbage byte\n"); + elength = 1; + goto next_desc; + } if (buffer[1] != USB_DT_CS_INTERFACE) { dev_err(&intf->dev, "skipping garbage\n"); goto next_desc; } - elength = buffer[0]; switch (buffer[2]) { case USB_CDC_UNION_TYPE: /* we've found it */ diff --git a/drivers/usb/host/ehci-msm.c b/drivers/usb/host/ehci-msm.c index 9db74ca7e5b9..275c92e53a59 100644 --- a/drivers/usb/host/ehci-msm.c +++ b/drivers/usb/host/ehci-msm.c @@ -88,13 +88,20 @@ static int ehci_msm_probe(struct platform_device *pdev) } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - hcd->regs = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(hcd->regs)) { - ret = PTR_ERR(hcd->regs); + if (!res) { + dev_err(&pdev->dev, "Unable to get memory resource\n"); + ret = -ENODEV; goto put_hcd; } + hcd->rsrc_start = res->start; hcd->rsrc_len = resource_size(res); + hcd->regs = devm_ioremap(&pdev->dev, hcd->rsrc_start, hcd->rsrc_len); + if (!hcd->regs) { + dev_err(&pdev->dev, "ioremap failed\n"); + ret = -ENOMEM; + goto put_hcd; + } /* * OTG driver takes care of PHY initialization, clock management, diff --git a/drivers/usb/storage/uas-detect.h b/drivers/usb/storage/uas-detect.h index 9893d696fc97..f58caa9e6a27 100644 --- a/drivers/usb/storage/uas-detect.h +++ b/drivers/usb/storage/uas-detect.h @@ -51,7 +51,8 @@ static int uas_find_endpoints(struct usb_host_interface *alt, } static int uas_use_uas_driver(struct usb_interface *intf, - const struct usb_device_id *id) + const struct usb_device_id *id, + unsigned long *flags_ret) { struct usb_host_endpoint *eps[4] = { }; struct usb_device *udev = interface_to_usbdev(intf); @@ -73,7 +74,7 @@ static int uas_use_uas_driver(struct usb_interface *intf, * this writing the following versions exist: * ASM1051 - no uas support version * ASM1051 - with broken (*) uas support - * ASM1053 - with working uas support + * ASM1053 - with working uas support, but problems with large xfers * ASM1153 - with working uas support * * Devices with these chips re-use a number of device-ids over the @@ -103,6 +104,9 @@ static int uas_use_uas_driver(struct usb_interface *intf, } else if (usb_ss_max_streams(&eps[1]->ss_ep_comp) == 32) { /* Possibly an ASM1051, disable uas */ flags |= US_FL_IGNORE_UAS; + } else { + /* ASM1053, these have issues with large transfers */ + flags |= US_FL_MAX_SECTORS_240; } } @@ -132,5 +136,8 @@ static int uas_use_uas_driver(struct usb_interface *intf, return 0; } + if (flags_ret) + *flags_ret = flags; + return 1; } diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 6cdabdc119a7..6d3122afeed3 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -759,7 +759,10 @@ static int uas_eh_bus_reset_handler(struct scsi_cmnd *cmnd) static int uas_slave_alloc(struct scsi_device *sdev) { - sdev->hostdata = (void *)sdev->host->hostdata; + struct uas_dev_info *devinfo = + (struct uas_dev_info *)sdev->host->hostdata; + + sdev->hostdata = devinfo; /* USB has unusual DMA-alignment requirements: Although the * starting address of each scatter-gather element doesn't matter, @@ -778,6 +781,11 @@ static int uas_slave_alloc(struct scsi_device *sdev) */ blk_queue_update_dma_alignment(sdev->request_queue, (512 - 1)); + if (devinfo->flags & US_FL_MAX_SECTORS_64) + blk_queue_max_hw_sectors(sdev->request_queue, 64); + else if (devinfo->flags & US_FL_MAX_SECTORS_240) + blk_queue_max_hw_sectors(sdev->request_queue, 240); + return 0; } @@ -887,8 +895,9 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) struct Scsi_Host *shost = NULL; struct uas_dev_info *devinfo; struct usb_device *udev = interface_to_usbdev(intf); + unsigned long dev_flags; - if (!uas_use_uas_driver(intf, id)) + if (!uas_use_uas_driver(intf, id, &dev_flags)) return -ENODEV; if (uas_switch_interface(udev, intf)) @@ -910,8 +919,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) devinfo->udev = udev; devinfo->resetting = 0; devinfo->shutdown = 0; - devinfo->flags = id->driver_info; - usb_stor_adjust_quirks(udev, &devinfo->flags); + devinfo->flags = dev_flags; init_usb_anchor(&devinfo->cmd_urbs); init_usb_anchor(&devinfo->sense_urbs); init_usb_anchor(&devinfo->data_urbs); diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 5600c33fcadb..6c10c888f35f 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -479,7 +479,8 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags) US_FL_SINGLE_LUN | US_FL_NO_WP_DETECT | US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 | US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE | - US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES); + US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES | + US_FL_MAX_SECTORS_240); p = quirks; while (*p) { @@ -520,6 +521,9 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags) case 'f': f |= US_FL_NO_REPORT_OPCODES; break; + case 'g': + f |= US_FL_MAX_SECTORS_240; + break; case 'h': f |= US_FL_CAPACITY_HEURISTICS; break; @@ -1080,7 +1084,7 @@ static int storage_probe(struct usb_interface *intf, /* If uas is enabled and this device can do uas then ignore it. */ #if IS_ENABLED(CONFIG_USB_UAS) - if (uas_use_uas_driver(intf, id)) + if (uas_use_uas_driver(intf, id, NULL)) return -ENXIO; #endif diff --git a/include/linux/kexec.h b/include/linux/kexec.h index e60a745ac198..e804306ef5e8 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -40,6 +40,10 @@ #error KEXEC_CONTROL_MEMORY_LIMIT not defined #endif +#ifndef KEXEC_CONTROL_MEMORY_GFP +#define KEXEC_CONTROL_MEMORY_GFP GFP_KERNEL +#endif + #ifndef KEXEC_CONTROL_PAGE_SIZE #error KEXEC_CONTROL_PAGE_SIZE not defined #endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bcbde799ec69..dbad4d728b4b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -60,6 +60,7 @@ struct phy_device; struct wireless_dev; /* 802.15.4 specific */ struct wpan_dev; +struct mpls_dev; void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -1627,6 +1628,9 @@ struct net_device { void *ax25_ptr; struct wireless_dev *ieee80211_ptr; struct wpan_dev *ieee802154_ptr; +#if IS_ENABLED(CONFIG_MPLS_ROUTING) + struct mpls_dev __rcu *mpls_ptr; +#endif /* * Cache lines mostly used on receive path (including eth_type_trans()) @@ -2021,10 +2025,10 @@ struct pcpu_sw_netstats { ({ \ typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \ if (pcpu_stats) { \ - int i; \ - for_each_possible_cpu(i) { \ + int __cpu; \ + for_each_possible_cpu(__cpu) { \ typeof(type) *stat; \ - stat = per_cpu_ptr(pcpu_stats, i); \ + stat = per_cpu_ptr(pcpu_stats, __cpu); \ u64_stats_init(&stat->syncp); \ } \ } \ diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index e23d242d1230..dbcbcc59aa92 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -282,7 +282,8 @@ static inline bool rht_shrink_below_30(const struct rhashtable *ht, static inline bool rht_grow_above_100(const struct rhashtable *ht, const struct bucket_table *tbl) { - return atomic_read(&ht->nelems) > tbl->size; + return atomic_read(&ht->nelems) > tbl->size && + (!ht->p.max_size || tbl->size < ht->p.max_size); } /* The bucket lock is selected based on the hash and protects mutations diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 06793b598f44..66e374d62f64 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -773,6 +773,7 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, int node); +struct sk_buff *__build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb(void *data, unsigned int frag_size); static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) diff --git a/include/linux/tty.h b/include/linux/tty.h index 358a337af598..fe5623c9af71 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -491,6 +491,7 @@ static inline speed_t tty_get_baud_rate(struct tty_struct *tty) extern void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old); extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b); +extern int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *); extern void tty_ldisc_deref(struct tty_ldisc *); diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index a7f2604c5f25..7f5f78bd15ad 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -77,6 +77,8 @@ /* Cannot handle ATA_12 or ATA_16 CDBs */ \ US_FLAG(NO_REPORT_OPCODES, 0x04000000) \ /* Cannot handle MI_REPORT_SUPPORTED_OPERATION_CODES */ \ + US_FLAG(MAX_SECTORS_240, 0x08000000) \ + /* Sets max_sectors to 240 */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; diff --git a/include/net/bonding.h b/include/net/bonding.h index fda6feeb6c1f..78ed135e9dea 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -30,13 +30,6 @@ #include <net/bond_alb.h> #include <net/bond_options.h> -#define DRV_VERSION "3.7.1" -#define DRV_RELDATE "April 27, 2011" -#define DRV_NAME "bonding" -#define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" - -#define bond_version DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n" - #define BOND_MAX_ARP_TARGETS 16 #define BOND_DEFAULT_MIIMON 100 diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 7b5887cd1172..48a815823587 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -279,12 +279,6 @@ static inline void inet_csk_reqsk_queue_add(struct sock *sk, void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, unsigned long timeout); -static inline void inet_csk_reqsk_queue_removed(struct sock *sk, - struct request_sock *req) -{ - reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); -} - static inline void inet_csk_reqsk_queue_added(struct sock *sk, const unsigned long timeout) { @@ -306,19 +300,7 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); } -static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, - struct request_sock *req) -{ - reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req); -} - -static inline void inet_csk_reqsk_queue_drop(struct sock *sk, - struct request_sock *req) -{ - inet_csk_reqsk_queue_unlink(sk, req); - inet_csk_reqsk_queue_removed(sk, req); - reqsk_put(req); -} +void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); void inet_csk_destroy_sock(struct sock *sk); void inet_csk_prepare_forced_close(struct sock *sk); diff --git a/include/net/request_sock.h b/include/net/request_sock.h index fe41f3ceb008..9f4265ce8892 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -212,24 +212,6 @@ static inline int reqsk_queue_empty(struct request_sock_queue *queue) return queue->rskq_accept_head == NULL; } -static inline void reqsk_queue_unlink(struct request_sock_queue *queue, - struct request_sock *req) -{ - struct listen_sock *lopt = queue->listen_opt; - struct request_sock **prev; - - spin_lock(&queue->syn_wait_lock); - - prev = &lopt->syn_table[req->rsk_hash]; - while (*prev != req) - prev = &(*prev)->dl_next; - *prev = req->dl_next; - - spin_unlock(&queue->syn_wait_lock); - if (del_timer(&req->rsk_timer)) - reqsk_put(req); -} - static inline void reqsk_queue_add(struct request_sock_queue *queue, struct request_sock *req, struct sock *parent, diff --git a/kernel/Makefile b/kernel/Makefile index 0f8f8b0bc1bf..60c302cfb4d3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -197,9 +197,9 @@ x509.genkey: @echo >>x509.genkey "x509_extensions = myexts" @echo >>x509.genkey @echo >>x509.genkey "[ req_distinguished_name ]" - @echo >>x509.genkey "O = Magrathea" - @echo >>x509.genkey "CN = Glacier signing key" - @echo >>x509.genkey "emailAddress = slartibartfast@magrathea.h2g2" + @echo >>x509.genkey "#O = Unspecified company" + @echo >>x509.genkey "CN = Build time autogenerated kernel key" + @echo >>x509.genkey "#emailAddress = unspecified.user@unspecified.company" @echo >>x509.genkey @echo >>x509.genkey "[ myexts ]" @echo >>x509.genkey "basicConstraints=critical,CA:FALSE" diff --git a/kernel/kexec.c b/kernel/kexec.c index 38c25b1f2fd5..7a36fdcca5bf 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -707,7 +707,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image, do { unsigned long pfn, epfn, addr, eaddr; - pages = kimage_alloc_pages(GFP_KERNEL, order); + pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); if (!pages) break; pfn = page_to_pfn(pages); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 4898442b837f..b28df4019ade 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -405,13 +405,18 @@ int rhashtable_insert_rehash(struct rhashtable *ht) if (rht_grow_above_75(ht, tbl)) size *= 2; - /* More than two rehashes (not resizes) detected. */ - else if (WARN_ON(old_tbl != tbl && old_tbl->size == size)) + /* Do not schedule more than one rehash */ + else if (old_tbl != tbl) return -EBUSY; new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC); - if (new_tbl == NULL) + if (new_tbl == NULL) { + /* Schedule async resize/rehash to try allocation + * non-atomic context. + */ + schedule_work(&ht->run_work); return -ENOMEM; + } err = rhashtable_rehash_attach(ht, tbl, new_tbl); if (err) { diff --git a/net/core/dev.c b/net/core/dev.c index 1796cef55ab5..c7ba0388f1be 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3079,7 +3079,7 @@ static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *rflow, u16 next_cpu) { - if (next_cpu != RPS_NO_CPU) { + if (next_cpu < nr_cpu_ids) { #ifdef CONFIG_RFS_ACCEL struct netdev_rx_queue *rxqueue; struct rps_dev_flow_table *flow_table; @@ -3184,7 +3184,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, * If the desired CPU (where last recvmsg was done) is * different from current CPU (one in the rx-queue flow * table entry), switch if one of the following holds: - * - Current CPU is unset (equal to RPS_NO_CPU). + * - Current CPU is unset (>= nr_cpu_ids). * - Current CPU is offline. * - The current CPU's queue tail has advanced beyond the * last packet that was enqueued using this table entry. @@ -3192,14 +3192,14 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, * have been dequeued, thus preserving in order delivery. */ if (unlikely(tcpu != next_cpu) && - (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || + (tcpu >= nr_cpu_ids || !cpu_online(tcpu) || ((int)(per_cpu(softnet_data, tcpu).input_queue_head - rflow->last_qtail)) >= 0)) { tcpu = next_cpu; rflow = set_rps_cpu(dev, skb, rflow, next_cpu); } - if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { + if (tcpu < nr_cpu_ids && cpu_online(tcpu)) { *rflowp = rflow; cpu = tcpu; goto done; @@ -3240,14 +3240,14 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, struct rps_dev_flow_table *flow_table; struct rps_dev_flow *rflow; bool expire = true; - int cpu; + unsigned int cpu; rcu_read_lock(); flow_table = rcu_dereference(rxqueue->rps_flow_table); if (flow_table && flow_id <= flow_table->mask) { rflow = &flow_table->flows[flow_id]; cpu = ACCESS_ONCE(rflow->cpu); - if (rflow->filter == filter_id && cpu != RPS_NO_CPU && + if (rflow->filter == filter_id && cpu < nr_cpu_ids && ((int)(per_cpu(softnet_data, cpu).input_queue_head - rflow->last_qtail) < (int)(10 * flow_table->mask))) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d1967dab9cc6..3cfff2a3d651 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -280,13 +280,14 @@ nodata: EXPORT_SYMBOL(__alloc_skb); /** - * build_skb - build a network buffer + * __build_skb - build a network buffer * @data: data buffer provided by caller - * @frag_size: size of fragment, or 0 if head was kmalloced + * @frag_size: size of data, or 0 if head was kmalloced * * Allocate a new &sk_buff. Caller provides space holding head and * skb_shared_info. @data must have been allocated by kmalloc() only if - * @frag_size is 0, otherwise data should come from the page allocator. + * @frag_size is 0, otherwise data should come from the page allocator + * or vmalloc() * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : @@ -297,7 +298,7 @@ EXPORT_SYMBOL(__alloc_skb); * before giving packet to stack. * RX rings only contains data buffers, not full skbs. */ -struct sk_buff *build_skb(void *data, unsigned int frag_size) +struct sk_buff *__build_skb(void *data, unsigned int frag_size) { struct skb_shared_info *shinfo; struct sk_buff *skb; @@ -311,7 +312,6 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) memset(skb, 0, offsetof(struct sk_buff, tail)); skb->truesize = SKB_TRUESIZE(size); - skb->head_frag = frag_size != 0; atomic_set(&skb->users, 1); skb->head = data; skb->data = data; @@ -328,6 +328,23 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) return skb; } + +/* build_skb() is wrapper over __build_skb(), that specifically + * takes care of skb->head and skb->pfmemalloc + * This means that if @frag_size is not zero, then @data must be backed + * by a page fragment, not kmalloc() or vmalloc() + */ +struct sk_buff *build_skb(void *data, unsigned int frag_size) +{ + struct sk_buff *skb = __build_skb(data, frag_size); + + if (skb && frag_size) { + skb->head_frag = 1; + if (virt_to_head_page(data)->pfmemalloc) + skb->pfmemalloc = 1; + } + return skb; +} EXPORT_SYMBOL(build_skb); struct netdev_alloc_cache { @@ -348,7 +365,8 @@ static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, gfp_t gfp = gfp_mask; if (order) { - gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; + gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC; page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); nc->frag.size = PAGE_SIZE << (page ? order : 0); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2b4f21d34df6..ccf4c5629b3c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -453,7 +453,8 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) iph->saddr, iph->daddr); if (req) { nsk = dccp_check_req(sk, skb, req); - reqsk_put(req); + if (!nsk) + reqsk_put(req); return nsk; } nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9d0551092c6c..5165571f397a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -301,7 +301,8 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) &iph->daddr, inet6_iif(skb)); if (req) { nsk = dccp_check_req(sk, skb, req); - reqsk_put(req); + if (!nsk) + reqsk_put(req); return nsk; } nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 5f566663e47f..30addee2dd03 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -186,8 +186,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, if (child == NULL) goto listen_overflow; - inet_csk_reqsk_queue_unlink(sk, req); - inet_csk_reqsk_queue_removed(sk, req); + inet_csk_reqsk_queue_drop(sk, req); inet_csk_reqsk_queue_add(sk, req, child); out: return child; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5c3dd6267ed3..8976ca423a07 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -564,6 +564,40 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) } EXPORT_SYMBOL(inet_rtx_syn_ack); +/* return true if req was found in the syn_table[] */ +static bool reqsk_queue_unlink(struct request_sock_queue *queue, + struct request_sock *req) +{ + struct listen_sock *lopt = queue->listen_opt; + struct request_sock **prev; + bool found = false; + + spin_lock(&queue->syn_wait_lock); + + for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; + prev = &(*prev)->dl_next) { + if (*prev == req) { + *prev = req->dl_next; + found = true; + break; + } + } + + spin_unlock(&queue->syn_wait_lock); + if (del_timer(&req->rsk_timer)) + reqsk_put(req); + return found; +} + +void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) +{ + if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) { + reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); + reqsk_put(req); + } +} +EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); + static void reqsk_timer_handler(unsigned long data) { struct request_sock *req = (struct request_sock *)data; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3571f2be4470..fc1c658ec6c1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1348,7 +1348,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); if (req) { nsk = tcp_check_req(sk, skb, req, false); - reqsk_put(req); + if (!nsk) + reqsk_put(req); return nsk; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 63d6311b5365..e5d7649136fc 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -755,10 +755,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (!child) goto listen_overflow; - inet_csk_reqsk_queue_unlink(sk, req); - inet_csk_reqsk_queue_removed(sk, req); - + inet_csk_reqsk_queue_drop(sk, req); inet_csk_reqsk_queue_add(sk, req, child); + /* Warning: caller must not call reqsk_put(req); + * child stole last reference on it. + */ return child; listen_overflow: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8c8d7e06b72f..a369e8a70b2c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2812,39 +2812,65 @@ begin_fwd: } } -/* Send a fin. The caller locks the socket for us. This cannot be - * allowed to fail queueing a FIN frame under any circumstances. +/* We allow to exceed memory limits for FIN packets to expedite + * connection tear down and (memory) recovery. + * Otherwise tcp_send_fin() could be tempted to either delay FIN + * or even be forced to close flow without any FIN. + */ +static void sk_forced_wmem_schedule(struct sock *sk, int size) +{ + int amt, status; + + if (size <= sk->sk_forward_alloc) + return; + amt = sk_mem_pages(size); + sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; + sk_memory_allocated_add(sk, amt, &status); +} + +/* Send a FIN. The caller locks the socket for us. + * We should try to send a FIN packet really hard, but eventually give up. */ void tcp_send_fin(struct sock *sk) { + struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk); struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb = tcp_write_queue_tail(sk); - int mss_now; - /* Optimization, tack on the FIN if we have a queue of - * unsent frames. But be careful about outgoing SACKS - * and IP options. + /* Optimization, tack on the FIN if we have one skb in write queue and + * this skb was not yet sent, or we are under memory pressure. + * Note: in the latter case, FIN packet will be sent after a timeout, + * as TCP stack thinks it has already been transmitted. */ - mss_now = tcp_current_mss(sk); - - if (tcp_send_head(sk)) { - TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; - TCP_SKB_CB(skb)->end_seq++; + if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) { +coalesce: + TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; + TCP_SKB_CB(tskb)->end_seq++; tp->write_seq++; + if (!tcp_send_head(sk)) { + /* This means tskb was already sent. + * Pretend we included the FIN on previous transmit. + * We need to set tp->snd_nxt to the value it would have + * if FIN had been sent. This is because retransmit path + * does not change tp->snd_nxt. + */ + tp->snd_nxt++; + return; + } } else { - /* Socket is locked, keep trying until memory is available. */ - for (;;) { - skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); - if (skb) - break; - yield(); + skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); + if (unlikely(!skb)) { + if (tskb) + goto coalesce; + return; } + skb_reserve(skb, MAX_TCP_HEADER); + sk_forced_wmem_schedule(sk, skb->truesize); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); tcp_queue_skb(sk, skb); } - __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); + __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF); } /* We get here when a process closes a file descriptor (either due to diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b5e6cc1d4a73..a38d3ac0f18f 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1246,7 +1246,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev) static int ip6gre_tunnel_init(struct net_device *dev) { struct ip6_tnl *tunnel; - int i; tunnel = netdev_priv(dev); @@ -1260,16 +1259,10 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (ipv6_addr_any(&tunnel->parms.raddr)) dev->header_ops = &ip6gre_header_ops; - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ip6gre_tunnel_stats; - ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ip6gre_tunnel_stats->syncp); - } - return 0; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ad51df85aa00..b6575d665568 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -946,7 +946,8 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); if (req) { nsk = tcp_check_req(sk, skb, req, false); - reqsk_put(req); + if (!nsk) + reqsk_put(req); return nsk; } nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index db8a2ea6d4de..954810c76a86 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -53,6 +53,11 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) return rt; } +static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) +{ + return rcu_dereference_rtnl(dev->mpls_ptr); +} + static bool mpls_output_possible(const struct net_device *dev) { return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); @@ -136,6 +141,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, struct mpls_route *rt; struct mpls_entry_decoded dec; struct net_device *out_dev; + struct mpls_dev *mdev; unsigned int hh_len; unsigned int new_header_size; unsigned int mtu; @@ -143,6 +149,10 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, /* Careful this entire function runs inside of an rcu critical section */ + mdev = mpls_dev_get(dev); + if (!mdev || !mdev->input_enabled) + goto drop; + if (skb->pkt_type != PACKET_HOST) goto drop; @@ -352,9 +362,9 @@ static int mpls_route_add(struct mpls_route_config *cfg) if (!dev) goto errout; - /* For now just support ethernet devices */ + /* Ensure this is a supported device */ err = -EINVAL; - if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) + if (!mpls_dev_get(dev)) goto errout; err = -EINVAL; @@ -428,10 +438,89 @@ errout: return err; } +#define MPLS_PERDEV_SYSCTL_OFFSET(field) \ + (&((struct mpls_dev *)0)->field) + +static const struct ctl_table mpls_dev_table[] = { + { + .procname = "input", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled), + }, + { } +}; + +static int mpls_dev_sysctl_register(struct net_device *dev, + struct mpls_dev *mdev) +{ + char path[sizeof("net/mpls/conf/") + IFNAMSIZ]; + struct ctl_table *table; + int i; + + table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL); + if (!table) + goto out; + + /* Table data contains only offsets relative to the base of + * the mdev at this point, so make them absolute. + */ + for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) + table[i].data = (char *)mdev + (uintptr_t)table[i].data; + + snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); + + mdev->sysctl = register_net_sysctl(dev_net(dev), path, table); + if (!mdev->sysctl) + goto free; + + return 0; + +free: + kfree(table); +out: + return -ENOBUFS; +} + +static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev) +{ + struct ctl_table *table; + + table = mdev->sysctl->ctl_table_arg; + unregister_net_sysctl_table(mdev->sysctl); + kfree(table); +} + +static struct mpls_dev *mpls_add_dev(struct net_device *dev) +{ + struct mpls_dev *mdev; + int err = -ENOMEM; + + ASSERT_RTNL(); + + mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); + if (!mdev) + return ERR_PTR(err); + + err = mpls_dev_sysctl_register(dev, mdev); + if (err) + goto free; + + rcu_assign_pointer(dev->mpls_ptr, mdev); + + return mdev; + +free: + kfree(mdev); + return ERR_PTR(err); +} + static void mpls_ifdown(struct net_device *dev) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); + struct mpls_dev *mdev; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); @@ -443,14 +532,35 @@ static void mpls_ifdown(struct net_device *dev) continue; rt->rt_dev = NULL; } + + mdev = mpls_dev_get(dev); + if (!mdev) + return; + + mpls_dev_sysctl_unregister(mdev); + + RCU_INIT_POINTER(dev->mpls_ptr, NULL); + + kfree(mdev); } static int mpls_dev_notify(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct mpls_dev *mdev; switch(event) { + case NETDEV_REGISTER: + /* For now just support ethernet devices */ + if ((dev->type == ARPHRD_ETHER) || + (dev->type == ARPHRD_LOOPBACK)) { + mdev = mpls_add_dev(dev); + if (IS_ERR(mdev)) + return notifier_from_errno(PTR_ERR(mdev)); + } + break; + case NETDEV_UNREGISTER: mpls_ifdown(dev); break; @@ -536,6 +646,15 @@ int nla_get_labels(const struct nlattr *nla, if ((dec.bos != bos) || dec.ttl || dec.tc) return -EINVAL; + switch (dec.label) { + case LABEL_IMPLICIT_NULL: + /* RFC3032: This is a label that an LSR may + * assign and distribute, but which never + * actually appears in the encapsulation. + */ + return -EINVAL; + } + label[i] = dec.label; } *labels = nla_labels; @@ -912,7 +1031,7 @@ static int mpls_platform_labels(struct ctl_table *table, int write, return ret; } -static struct ctl_table mpls_table[] = { +static const struct ctl_table mpls_table[] = { { .procname = "platform_labels", .data = NULL, diff --git a/net/mpls/internal.h b/net/mpls/internal.h index fb6de92052c4..693877d69606 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -22,6 +22,12 @@ struct mpls_entry_decoded { u8 bos; }; +struct mpls_dev { + int input_enabled; + + struct ctl_table_header *sysctl; +}; + struct sk_buff; static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index 57d3e1af5630..0522fc9bfb0a 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -63,6 +63,8 @@ int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) goto nla_put_failure; break; + default: + break; } return 0; diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 62cabee42fbe..635dbba93d01 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -108,6 +108,8 @@ static int nft_reject_inet_dump(struct sk_buff *skb, if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) goto nla_put_failure; break; + default: + break; } return 0; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 19909d0786a2..ec4adbdcb9b4 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1629,13 +1629,11 @@ static struct sk_buff *netlink_alloc_large_skb(unsigned int size, if (data == NULL) return NULL; - skb = build_skb(data, size); + skb = __build_skb(data, size); if (skb == NULL) vfree(data); - else { - skb->head_frag = 0; + else skb->destructor = netlink_skb_destructor; - } return skb; } diff --git a/net/tipc/link.c b/net/tipc/link.c index a6b30df6ec02..57be6e6aff99 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2143,7 +2143,6 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) err = __tipc_nl_add_node_links(net, &msg, node, &prev_link); tipc_node_unlock(node); - tipc_node_put(node); if (err) goto out; diff --git a/net/tipc/server.c b/net/tipc/server.c index ab6183cdb121..77ff03ed1e18 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -102,7 +102,7 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); - sk_release_kernel(sk); + sock_release(sock); con->sock = NULL; } @@ -321,12 +321,9 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) struct socket *sock = NULL; int ret; - ret = sock_create_kern(AF_TIPC, SOCK_SEQPACKET, 0, &sock); + ret = __sock_create(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock, 1); if (ret < 0) return NULL; - - sk_change_net(sock->sk, s->net); - ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, (char *)&s->imp, sizeof(s->imp)); if (ret < 0) @@ -376,7 +373,7 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) create_err: kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); return NULL; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ee90d74d7516..9074b5cede38 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1764,13 +1764,14 @@ static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) { u32 dnode, dport = 0; - int err = -TIPC_ERR_NO_PORT; + int err; struct sk_buff *skb; struct tipc_sock *tsk; struct tipc_net *tn; struct sock *sk; while (skb_queue_len(inputq)) { + err = -TIPC_ERR_NO_PORT; skb = NULL; dport = tipc_skb_peek_port(inputq, dport); tsk = tipc_sk_lookup(net, dport); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 99f7012b23b9..a73a226f2d33 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -95,39 +95,36 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); unsigned int unix_tot_inflight; - struct sock *unix_get_socket(struct file *filp) { struct sock *u_sock = NULL; struct inode *inode = file_inode(filp); - /* - * Socket ? - */ + /* Socket ? */ if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { struct socket *sock = SOCKET_I(inode); struct sock *s = sock->sk; - /* - * PF_UNIX ? - */ + /* PF_UNIX ? */ if (s && sock->ops && sock->ops->family == PF_UNIX) u_sock = s; } return u_sock; } -/* - * Keep the number of times in flight count for the file - * descriptor if it is for an AF_UNIX socket. +/* Keep the number of times in flight count for the file + * descriptor if it is for an AF_UNIX socket. */ void unix_inflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + if (s) { struct unix_sock *u = unix_sk(s); + spin_lock(&unix_gc_lock); + if (atomic_long_inc_return(&u->inflight) == 1) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); @@ -142,10 +139,13 @@ void unix_inflight(struct file *fp) void unix_notinflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + if (s) { struct unix_sock *u = unix_sk(s); + spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); + if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); unix_tot_inflight--; @@ -161,32 +161,27 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), spin_lock(&x->sk_receive_queue.lock); skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { - /* - * Do we have file descriptors ? - */ + /* Do we have file descriptors ? */ if (UNIXCB(skb).fp) { bool hit = false; - /* - * Process the descriptors of this socket - */ + /* Process the descriptors of this socket */ int nfd = UNIXCB(skb).fp->count; struct file **fp = UNIXCB(skb).fp->fp; + while (nfd--) { - /* - * Get the socket the fd matches - * if it indeed does so - */ + /* Get the socket the fd matches if it indeed does so */ struct sock *sk = unix_get_socket(*fp++); + if (sk) { struct unix_sock *u = unix_sk(sk); - /* - * Ignore non-candidates, they could + /* Ignore non-candidates, they could * have been added to the queues after * starting the garbage collection */ if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { hit = true; + func(u); } } @@ -203,24 +198,22 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), static void scan_children(struct sock *x, void (*func)(struct unix_sock *), struct sk_buff_head *hitlist) { - if (x->sk_state != TCP_LISTEN) + if (x->sk_state != TCP_LISTEN) { scan_inflight(x, func, hitlist); - else { + } else { struct sk_buff *skb; struct sk_buff *next; struct unix_sock *u; LIST_HEAD(embryos); - /* - * For a listening socket collect the queued embryos + /* For a listening socket collect the queued embryos * and perform a scan on them as well. */ spin_lock(&x->sk_receive_queue.lock); skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { u = unix_sk(skb->sk); - /* - * An embryo cannot be in-flight, so it's safe + /* An embryo cannot be in-flight, so it's safe * to use the list link. */ BUG_ON(!list_empty(&u->link)); @@ -249,8 +242,7 @@ static void inc_inflight(struct unix_sock *usk) static void inc_inflight_move_tail(struct unix_sock *u) { atomic_long_inc(&u->inflight); - /* - * If this still might be part of a cycle, move it to the end + /* If this still might be part of a cycle, move it to the end * of the list, so that it's checked even if it was already * passed over */ @@ -263,8 +255,7 @@ static bool gc_in_progress; void wait_for_unix_gc(void) { - /* - * If number of inflight sockets is insane, + /* If number of inflight sockets is insane, * force a garbage collect right now. */ if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) @@ -288,8 +279,7 @@ void unix_gc(void) goto out; gc_in_progress = true; - /* - * First, select candidates for garbage collection. Only + /* First, select candidates for garbage collection. Only * in-flight sockets are considered, and from those only ones * which don't have any external reference. * @@ -320,15 +310,13 @@ void unix_gc(void) } } - /* - * Now remove all internal in-flight reference to children of + /* Now remove all internal in-flight reference to children of * the candidates. */ list_for_each_entry(u, &gc_candidates, link) scan_children(&u->sk, dec_inflight, NULL); - /* - * Restore the references for children of all candidates, + /* Restore the references for children of all candidates, * which have remaining references. Do this recursively, so * only those remain, which form cyclic references. * @@ -350,8 +338,7 @@ void unix_gc(void) } list_del(&cursor); - /* - * not_cycle_list contains those sockets which do not make up a + /* not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. */ while (!list_empty(¬_cycle_list)) { @@ -360,8 +347,7 @@ void unix_gc(void) list_move_tail(&u->link, &gc_inflight_list); } - /* - * Now gc_candidates contains only garbage. Restore original + /* Now gc_candidates contains only garbage. Restore original * inflight counters for these as well, and remove the skbuffs * which are creating the cycle(s). */ |