diff options
130 files changed, 1168 insertions, 712 deletions
diff --git a/Documentation/Changes b/Documentation/Changes index 6d8863004858..f447f0516f07 100644 --- a/Documentation/Changes +++ b/Documentation/Changes @@ -43,7 +43,7 @@ o udev 081 # udevd --version o grub 0.93 # grub --version || grub-install --version o mcelog 0.6 # mcelog --version o iptables 1.4.2 # iptables -V -o openssl & libcrypto 1.0.1k # openssl version +o openssl & libcrypto 1.0.0 # openssl version Kernel compilation diff --git a/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt index 391717a68f3b..ec96b1f01478 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt @@ -4,8 +4,8 @@ The MISC interrupt controller is a secondary controller for lower priority interrupt. Required Properties: -- compatible: has to be "qca,<soctype>-cpu-intc", "qca,ar7100-misc-intc" - as fallback +- compatible: has to be "qca,<soctype>-cpu-intc", "qca,ar7100-misc-intc" or + "qca,<soctype>-cpu-intc", "qca,ar7240-misc-intc" - reg: Base address and size of the controllers memory area - interrupt-parent: phandle of the parent interrupt controller. - interrupts: Interrupt specifier for the controllers interrupt. @@ -13,6 +13,9 @@ Required Properties: - #interrupt-cells : Specifies the number of cells needed to encode interrupt source, should be 1 +Compatible fallback depends on the SoC. Use ar7100 for ar71xx and ar913x, +use ar7240 for all other SoCs. + Please refer to interrupts.txt in this directory for details of the common Interrupt Controllers bindings used by client devices. @@ -28,3 +31,16 @@ Example: interrupt-controller; #interrupt-cells = <1>; }; + +Another example: + + interrupt-controller@18060010 { + compatible = "qca,ar9331-misc-intc", qca,ar7240-misc-intc"; + reg = <0x18060010 0x4>; + + interrupt-parent = <&cpuintc>; + interrupts = <6>; + + interrupt-controller; + #interrupt-cells = <1>; + }; diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt index 62328d76b55b..b0e911e0e8f5 100644 --- a/Documentation/power/pci.txt +++ b/Documentation/power/pci.txt @@ -979,20 +979,45 @@ every time right after the runtime_resume() callback has returned (alternatively, the runtime_suspend() callback will have to check if the device should really be suspended and return -EAGAIN if that is not the case). -The runtime PM of PCI devices is disabled by default. It is also blocked by -pci_pm_init() that runs the pm_runtime_forbid() helper function. If a PCI -driver implements the runtime PM callbacks and intends to use the runtime PM -framework provided by the PM core and the PCI subsystem, it should enable this -feature by executing the pm_runtime_enable() helper function. However, the -driver should not call the pm_runtime_allow() helper function unblocking -the runtime PM of the device. Instead, it should allow user space or some -platform-specific code to do that (user space can do it via sysfs), although -once it has called pm_runtime_enable(), it must be prepared to handle the +The runtime PM of PCI devices is enabled by default by the PCI core. PCI +device drivers do not need to enable it and should not attempt to do so. +However, it is blocked by pci_pm_init() that runs the pm_runtime_forbid() +helper function. In addition to that, the runtime PM usage counter of +each PCI device is incremented by local_pci_probe() before executing the +probe callback provided by the device's driver. + +If a PCI driver implements the runtime PM callbacks and intends to use the +runtime PM framework provided by the PM core and the PCI subsystem, it needs +to decrement the device's runtime PM usage counter in its probe callback +function. If it doesn't do that, the counter will always be different from +zero for the device and it will never be runtime-suspended. The simplest +way to do that is by calling pm_runtime_put_noidle(), but if the driver +wants to schedule an autosuspend right away, for example, it may call +pm_runtime_put_autosuspend() instead for this purpose. Generally, it +just needs to call a function that decrements the devices usage counter +from its probe routine to make runtime PM work for the device. + +It is important to remember that the driver's runtime_suspend() callback +may be executed right after the usage counter has been decremented, because +user space may already have cuased the pm_runtime_allow() helper function +unblocking the runtime PM of the device to run via sysfs, so the driver must +be prepared to cope with that. + +The driver itself should not call pm_runtime_allow(), though. Instead, it +should let user space or some platform-specific code do that (user space can +do it via sysfs as stated above), but it must be prepared to handle the runtime PM of the device correctly as soon as pm_runtime_allow() is called -(which may happen at any time). [It also is possible that user space causes -pm_runtime_allow() to be called via sysfs before the driver is loaded, so in -fact the driver has to be prepared to handle the runtime PM of the device as -soon as it calls pm_runtime_enable().] +(which may happen at any time, even before the driver is loaded). + +When the driver's remove callback runs, it has to balance the decrementation +of the device's runtime PM usage counter at the probe time. For this reason, +if it has decremented the counter in its probe callback, it must run +pm_runtime_get_noresume() in its remove callback. [Since the core carries +out a runtime resume of the device and bumps up the device's usage counter +before running the driver's remove callback, the runtime PM of the device +is effectively disabled for the duration of the remove execution and all +runtime PM helper functions incrementing the device's usage counter are +then effectively equivalent to pm_runtime_get_noresume().] The runtime PM framework works by processing requests to suspend or resume devices, or to check if they are idle (in which cases it is reasonable to diff --git a/Documentation/ptp/testptp.c b/Documentation/ptp/testptp.c index 2bc8abc57fa0..6c6247aaa7b9 100644 --- a/Documentation/ptp/testptp.c +++ b/Documentation/ptp/testptp.c @@ -18,6 +18,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */ #include <errno.h> #include <fcntl.h> #include <inttypes.h> diff --git a/MAINTAINERS b/MAINTAINERS index 9f6685f6c5a9..797236befd27 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5957,7 +5957,7 @@ F: virt/kvm/ KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V M: Joerg Roedel <joro@8bytes.org> L: kvm@vger.kernel.org -W: http://kvm.qumranet.com +W: http://www.linux-kvm.org/ S: Maintained F: arch/x86/include/asm/svm.h F: arch/x86/kvm/svm.c @@ -5965,7 +5965,7 @@ F: arch/x86/kvm/svm.c KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC M: Alexander Graf <agraf@suse.com> L: kvm-ppc@vger.kernel.org -W: http://kvm.qumranet.com +W: http://www.linux-kvm.org/ T: git git://github.com/agraf/linux-2.6.git S: Supported F: arch/powerpc/include/asm/kvm* diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c index 15ecb4831e12..eeb3953ed8ac 100644 --- a/arch/mips/ath79/irq.c +++ b/arch/mips/ath79/irq.c @@ -293,8 +293,26 @@ static int __init ath79_misc_intc_of_init( return 0; } -IRQCHIP_DECLARE(ath79_misc_intc, "qca,ar7100-misc-intc", - ath79_misc_intc_of_init); + +static int __init ar7100_misc_intc_of_init( + struct device_node *node, struct device_node *parent) +{ + ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask; + return ath79_misc_intc_of_init(node, parent); +} + +IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc", + ar7100_misc_intc_of_init); + +static int __init ar7240_misc_intc_of_init( + struct device_node *node, struct device_node *parent) +{ + ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack; + return ath79_misc_intc_of_init(node, parent); +} + +IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc", + ar7240_misc_intc_of_init); static int __init ar79_cpu_intc_of_init( struct device_node *node, struct device_node *parent) diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 9801ac982655..fe67f12ac239 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -20,6 +20,9 @@ #ifndef cpu_has_tlb #define cpu_has_tlb (cpu_data[0].options & MIPS_CPU_TLB) #endif +#ifndef cpu_has_ftlb +#define cpu_has_ftlb (cpu_data[0].options & MIPS_CPU_FTLB) +#endif #ifndef cpu_has_tlbinv #define cpu_has_tlbinv (cpu_data[0].options & MIPS_CPU_TLBINV) #endif diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index cd89e9855775..82ad15f11049 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -385,6 +385,7 @@ enum cpu_type_enum { #define MIPS_CPU_CDMM 0x4000000000ull /* CPU has Common Device Memory Map */ #define MIPS_CPU_BP_GHIST 0x8000000000ull /* R12K+ Branch Prediction Global History */ #define MIPS_CPU_SP 0x10000000000ull /* Small (1KB) page support */ +#define MIPS_CPU_FTLB 0x20000000000ull /* CPU has Fixed-page-size TLB */ /* * CPU ASE encodings diff --git a/arch/mips/include/asm/maar.h b/arch/mips/include/asm/maar.h index b02891f9caaf..21d9607c80d7 100644 --- a/arch/mips/include/asm/maar.h +++ b/arch/mips/include/asm/maar.h @@ -66,6 +66,15 @@ static inline void write_maar_pair(unsigned idx, phys_addr_t lower, } /** + * maar_init() - initialise MAARs + * + * Performs initialisation of MAARs for the current CPU, making use of the + * platforms implementation of platform_maar_init where necessary and + * duplicating the setup it provides on secondary CPUs. + */ +extern void maar_init(void); + +/** * struct maar_config - MAAR configuration data * @lower: The lowest address that the MAAR pair will affect. Must be * aligned to a 2^16 byte boundary. diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index d75b75e78ebb..1f1927ab4269 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -194,6 +194,7 @@ BUILD_CM_RW(reg3_mask, MIPS_CM_GCB_OFS + 0xc8) BUILD_CM_R_(gic_status, MIPS_CM_GCB_OFS + 0xd0) BUILD_CM_R_(cpc_status, MIPS_CM_GCB_OFS + 0xf0) BUILD_CM_RW(l2_config, MIPS_CM_GCB_OFS + 0x130) +BUILD_CM_RW(sys_config2, MIPS_CM_GCB_OFS + 0x150) /* Core Local & Core Other register accessor functions */ BUILD_CM_Cx_RW(reset_release, 0x00) @@ -316,6 +317,10 @@ BUILD_CM_Cx_R_(tcid_8_priority, 0x80) #define CM_GCR_L2_CONFIG_ASSOC_SHF 0 #define CM_GCR_L2_CONFIG_ASSOC_MSK (_ULCAST_(0xff) << 0) +/* GCR_SYS_CONFIG2 register fields */ +#define CM_GCR_SYS_CONFIG2_MAXVPW_SHF 0 +#define CM_GCR_SYS_CONFIG2_MAXVPW_MSK (_ULCAST_(0xf) << 0) + /* GCR_Cx_COHERENCE register fields */ #define CM_GCR_Cx_COHERENCE_COHDOMAINEN_SHF 0 #define CM_GCR_Cx_COHERENCE_COHDOMAINEN_MSK (_ULCAST_(0xff) << 0) @@ -405,4 +410,38 @@ static inline int mips_cm_revision(void) return read_gcr_rev(); } +/** + * mips_cm_max_vp_width() - return the width in bits of VP indices + * + * Return: the width, in bits, of VP indices in fields that combine core & VP + * indices. + */ +static inline unsigned int mips_cm_max_vp_width(void) +{ + extern int smp_num_siblings; + + if (mips_cm_revision() >= CM_REV_CM3) + return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW_MSK; + + return smp_num_siblings; +} + +/** + * mips_cm_vp_id() - calculate the hardware VP ID for a CPU + * @cpu: the CPU whose VP ID to calculate + * + * Hardware such as the GIC uses identifiers for VPs which may not match the + * CPU numbers used by Linux. This function calculates the hardware VP + * identifier corresponding to a given CPU. + * + * Return: the VP ID for the CPU. + */ +static inline unsigned int mips_cm_vp_id(unsigned int cpu) +{ + unsigned int core = cpu_data[cpu].core; + unsigned int vp = cpu_vpe_id(&cpu_data[cpu]); + + return (core * mips_cm_max_vp_width()) + vp; +} + #endif /* __MIPS_ASM_MIPS_CM_H__ */ diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index d3cd8eac81e3..c64781cf649f 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -487,6 +487,8 @@ /* Bits specific to the MIPS32/64 PRA. */ #define MIPS_CONF_MT (_ULCAST_(7) << 7) +#define MIPS_CONF_MT_TLB (_ULCAST_(1) << 7) +#define MIPS_CONF_MT_FTLB (_ULCAST_(4) << 7) #define MIPS_CONF_AR (_ULCAST_(7) << 10) #define MIPS_CONF_AT (_ULCAST_(3) << 13) #define MIPS_CONF_M (_ULCAST_(1) << 31) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 571a8e6ea5bd..09a51d091941 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -410,16 +410,18 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, int enable) static inline unsigned int decode_config0(struct cpuinfo_mips *c) { unsigned int config0; - int isa; + int isa, mt; config0 = read_c0_config(); /* * Look for Standard TLB or Dual VTLB and FTLB */ - if ((((config0 & MIPS_CONF_MT) >> 7) == 1) || - (((config0 & MIPS_CONF_MT) >> 7) == 4)) + mt = config0 & MIPS_CONF_MT; + if (mt == MIPS_CONF_MT_TLB) c->options |= MIPS_CPU_TLB; + else if (mt == MIPS_CONF_MT_FTLB) + c->options |= MIPS_CPU_TLB | MIPS_CPU_FTLB; isa = (config0 & MIPS_CONF_AT) >> 13; switch (isa) { @@ -559,15 +561,18 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c) if (cpu_has_tlb) { if (((config4 & MIPS_CONF4_IE) >> 29) == 2) c->options |= MIPS_CPU_TLBINV; + /* - * This is a bit ugly. R6 has dropped that field from - * config4 and the only valid configuration is VTLB+FTLB so - * set a good value for mmuextdef for that case. + * R6 has dropped the MMUExtDef field from config4. + * On R6 the fields always describe the FTLB, and only if it is + * present according to Config.MT. */ - if (cpu_has_mips_r6) + if (!cpu_has_mips_r6) + mmuextdef = config4 & MIPS_CONF4_MMUEXTDEF; + else if (cpu_has_ftlb) mmuextdef = MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT; else - mmuextdef = config4 & MIPS_CONF4_MMUEXTDEF; + mmuextdef = 0; switch (mmuextdef) { case MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT: diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 35b8316002f8..479515109e5b 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -338,7 +338,7 @@ static void __init bootmem_init(void) if (end <= reserved_end) continue; #ifdef CONFIG_BLK_DEV_INITRD - /* mapstart should be after initrd_end */ + /* Skip zones before initrd and initrd itself */ if (initrd_end && end <= (unsigned long)PFN_UP(__pa(initrd_end))) continue; #endif @@ -371,6 +371,14 @@ static void __init bootmem_init(void) max_low_pfn = PFN_DOWN(HIGHMEM_START); } +#ifdef CONFIG_BLK_DEV_INITRD + /* + * mapstart should be after initrd_end + */ + if (initrd_end) + mapstart = max(mapstart, (unsigned long)PFN_UP(__pa(initrd_end))); +#endif + /* * Initialize the boot-time allocator with low memory only. */ diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index a31896c33716..bd4385a8e6e8 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -42,6 +42,7 @@ #include <asm/mmu_context.h> #include <asm/time.h> #include <asm/setup.h> +#include <asm/maar.h> cpumask_t cpu_callin_map; /* Bitmask of started secondaries */ @@ -157,6 +158,7 @@ asmlinkage void start_secondary(void) mips_clockevent_init(); mp_ops->init_secondary(); cpu_report(); + maar_init(); /* * XXX parity protection should be folded in here when it's converted diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c index f6c44dd332e2..d6d07ad56180 100644 --- a/arch/mips/loongson64/common/env.c +++ b/arch/mips/loongson64/common/env.c @@ -64,6 +64,9 @@ void __init prom_init_env(void) } if (memsize == 0) memsize = 256; + + loongson_sysconf.nr_uarts = 1; + pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize); #else struct boot_params *boot_p; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 66d0f49c5bec..8770e619185e 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -44,6 +44,7 @@ #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/fixmap.h> +#include <asm/maar.h> /* * We have up to 8 empty zeroed pages so we can map one of the right colour @@ -252,6 +253,119 @@ void __init fixrange_init(unsigned long start, unsigned long end, #endif } +unsigned __weak platform_maar_init(unsigned num_pairs) +{ + struct maar_config cfg[BOOT_MEM_MAP_MAX]; + unsigned i, num_configured, num_cfg = 0; + phys_addr_t skip; + + for (i = 0; i < boot_mem_map.nr_map; i++) { + switch (boot_mem_map.map[i].type) { + case BOOT_MEM_RAM: + case BOOT_MEM_INIT_RAM: + break; + default: + continue; + } + + skip = 0x10000 - (boot_mem_map.map[i].addr & 0xffff); + + cfg[num_cfg].lower = boot_mem_map.map[i].addr; + cfg[num_cfg].lower += skip; + + cfg[num_cfg].upper = cfg[num_cfg].lower; + cfg[num_cfg].upper += boot_mem_map.map[i].size - 1; + cfg[num_cfg].upper -= skip; + + cfg[num_cfg].attrs = MIPS_MAAR_S; + num_cfg++; + } + + num_configured = maar_config(cfg, num_cfg, num_pairs); + if (num_configured < num_cfg) + pr_warn("Not enough MAAR pairs (%u) for all bootmem regions (%u)\n", + num_pairs, num_cfg); + + return num_configured; +} + +void maar_init(void) +{ + unsigned num_maars, used, i; + phys_addr_t lower, upper, attr; + static struct { + struct maar_config cfgs[3]; + unsigned used; + } recorded = { { { 0 } }, 0 }; + + if (!cpu_has_maar) + return; + + /* Detect the number of MAARs */ + write_c0_maari(~0); + back_to_back_c0_hazard(); + num_maars = read_c0_maari() + 1; + + /* MAARs should be in pairs */ + WARN_ON(num_maars % 2); + + /* Set MAARs using values we recorded already */ + if (recorded.used) { + used = maar_config(recorded.cfgs, recorded.used, num_maars / 2); + BUG_ON(used != recorded.used); + } else { + /* Configure the required MAARs */ + used = platform_maar_init(num_maars / 2); + } + + /* Disable any further MAARs */ + for (i = (used * 2); i < num_maars; i++) { + write_c0_maari(i); + back_to_back_c0_hazard(); + write_c0_maar(0); + back_to_back_c0_hazard(); + } + + if (recorded.used) + return; + + pr_info("MAAR configuration:\n"); + for (i = 0; i < num_maars; i += 2) { + write_c0_maari(i); + back_to_back_c0_hazard(); + upper = read_c0_maar(); + + write_c0_maari(i + 1); + back_to_back_c0_hazard(); + lower = read_c0_maar(); + + attr = lower & upper; + lower = (lower & MIPS_MAAR_ADDR) << 4; + upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff; + + pr_info(" [%d]: ", i / 2); + if (!(attr & MIPS_MAAR_V)) { + pr_cont("disabled\n"); + continue; + } + + pr_cont("%pa-%pa", &lower, &upper); + + if (attr & MIPS_MAAR_S) + pr_cont(" speculate"); + + pr_cont("\n"); + + /* Record the setup for use on secondary CPUs */ + if (used <= ARRAY_SIZE(recorded.cfgs)) { + recorded.cfgs[recorded.used].lower = lower; + recorded.cfgs[recorded.used].upper = upper; + recorded.cfgs[recorded.used].attrs = attr; + recorded.used++; + } + } +} + #ifndef CONFIG_NEED_MULTIPLE_NODES int page_is_ram(unsigned long pagenr) { @@ -334,69 +448,6 @@ static inline void mem_init_free_highmem(void) #endif } -unsigned __weak platform_maar_init(unsigned num_pairs) -{ - struct maar_config cfg[BOOT_MEM_MAP_MAX]; - unsigned i, num_configured, num_cfg = 0; - phys_addr_t skip; - - for (i = 0; i < boot_mem_map.nr_map; i++) { - switch (boot_mem_map.map[i].type) { - case BOOT_MEM_RAM: - case BOOT_MEM_INIT_RAM: - break; - default: - continue; - } - - skip = 0x10000 - (boot_mem_map.map[i].addr & 0xffff); - - cfg[num_cfg].lower = boot_mem_map.map[i].addr; - cfg[num_cfg].lower += skip; - - cfg[num_cfg].upper = cfg[num_cfg].lower; - cfg[num_cfg].upper += boot_mem_map.map[i].size - 1; - cfg[num_cfg].upper -= skip; - - cfg[num_cfg].attrs = MIPS_MAAR_S; - num_cfg++; - } - - num_configured = maar_config(cfg, num_cfg, num_pairs); - if (num_configured < num_cfg) - pr_warn("Not enough MAAR pairs (%u) for all bootmem regions (%u)\n", - num_pairs, num_cfg); - - return num_configured; -} - -static void maar_init(void) -{ - unsigned num_maars, used, i; - - if (!cpu_has_maar) - return; - - /* Detect the number of MAARs */ - write_c0_maari(~0); - back_to_back_c0_hazard(); - num_maars = read_c0_maari() + 1; - - /* MAARs should be in pairs */ - WARN_ON(num_maars % 2); - - /* Configure the required MAARs */ - used = platform_maar_init(num_maars / 2); - - /* Disable any further MAARs */ - for (i = (used * 2); i < num_maars; i++) { - write_c0_maari(i); - back_to_back_c0_hazard(); - write_c0_maar(0); - back_to_back_c0_hazard(); - } -} - void __init mem_init(void) { #ifdef CONFIG_HIGHMEM diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S index e92726099be0..dabf4179cd7e 100644 --- a/arch/mips/net/bpf_jit_asm.S +++ b/arch/mips/net/bpf_jit_asm.S @@ -64,8 +64,20 @@ sk_load_word_positive: PTR_ADDU t1, $r_skb_data, offset lw $r_A, 0(t1) #ifdef CONFIG_CPU_LITTLE_ENDIAN +# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) wsbh t0, $r_A rotr $r_A, t0, 16 +# else + sll t0, $r_A, 24 + srl t1, $r_A, 24 + srl t2, $r_A, 8 + or t0, t0, t1 + andi t2, t2, 0xff00 + andi t1, $r_A, 0xff00 + or t0, t0, t2 + sll t1, t1, 8 + or $r_A, t0, t1 +# endif #endif jr $r_ra move $r_ret, zero @@ -80,8 +92,16 @@ sk_load_half_positive: PTR_ADDU t1, $r_skb_data, offset lh $r_A, 0(t1) #ifdef CONFIG_CPU_LITTLE_ENDIAN +# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) wsbh t0, $r_A seh $r_A, t0 +# else + sll t0, $r_A, 24 + andi t1, $r_A, 0xff00 + sra t0, t0, 16 + srl t1, t1, 8 + or $r_A, t0, t1 +# endif #endif jr $r_ra move $r_ret, zero @@ -148,23 +168,47 @@ sk_load_byte_positive: NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) bpf_slow_path_common(4) #ifdef CONFIG_CPU_LITTLE_ENDIAN +# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) wsbh t0, $r_s0 jr $r_ra rotr $r_A, t0, 16 -#endif +# else + sll t0, $r_s0, 24 + srl t1, $r_s0, 24 + srl t2, $r_s0, 8 + or t0, t0, t1 + andi t2, t2, 0xff00 + andi t1, $r_s0, 0xff00 + or t0, t0, t2 + sll t1, t1, 8 + jr $r_ra + or $r_A, t0, t1 +# endif +#else jr $r_ra - move $r_A, $r_s0 + move $r_A, $r_s0 +#endif END(bpf_slow_path_word) NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp) bpf_slow_path_common(2) #ifdef CONFIG_CPU_LITTLE_ENDIAN +# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) jr $r_ra wsbh $r_A, $r_s0 -#endif +# else + sll t0, $r_s0, 8 + andi t1, $r_s0, 0xff00 + andi t0, t0, 0xff00 + srl t1, t1, 8 + jr $r_ra + or $r_A, t0, t1 +# endif +#else jr $r_ra move $r_A, $r_s0 +#endif END(bpf_slow_path_half) diff --git a/arch/tile/kernel/usb.c b/arch/tile/kernel/usb.c index f0da5a237e94..9f1e05e12255 100644 --- a/arch/tile/kernel/usb.c +++ b/arch/tile/kernel/usb.c @@ -22,6 +22,7 @@ #include <linux/platform_device.h> #include <linux/usb/tilegx.h> #include <linux/init.h> +#include <linux/module.h> #include <linux/types.h> static u64 ehci_dmamask = DMA_BIT_MASK(32); diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index ab5f1d447ef9..ae68be92f755 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -86,6 +86,7 @@ extern u64 asmlinkage efi_call(void *fp, ...); extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, u32 type, u64 attribute); +#ifdef CONFIG_KASAN /* * CONFIG_KASAN may redefine memset to __memset. __memset function is present * only in kernel binary. Since the EFI stub linked into a separate binary it @@ -95,6 +96,7 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, #undef memcpy #undef memset #undef memmove +#endif #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b98b471a3b7e..b8c14bb7fc8f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -141,6 +141,8 @@ #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define MSR_PEBS_FRONTEND 0x000003f7 + #define MSR_IA32_POWER_CTL 0x000001fc #define MSR_IA32_MC0_CTL 0x00000400 diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 655e07a48f6c..67f08230103a 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h @@ -41,6 +41,7 @@ struct pvclock_wall_clock { #define PVCLOCK_TSC_STABLE_BIT (1 << 0) #define PVCLOCK_GUEST_STOPPED (1 << 1) +/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */ #define PVCLOCK_COUNTS_FROM_ZERO (1 << 2) #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PVCLOCK_ABI_H */ diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 5edf6d868fc1..165be83a7fa4 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -47,6 +47,7 @@ enum extra_reg_type { EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ EXTRA_REG_LBR = 2, /* lbr_select */ EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ + EXTRA_REG_FE = 4, /* fe_* */ EXTRA_REG_MAX /* number of entries needed */ }; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 3fefebfbdf4b..f63360be2238 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -205,6 +205,11 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + /* + * Note the low 8 bits eventsel code is not a continuous field, containing + * some #GPing bits. These are masked out. + */ + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), EVENT_EXTRA_END }; @@ -250,7 +255,7 @@ struct event_constraint intel_bdw_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ - INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */ + INTEL_UEVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ EVENT_CONSTRAINT_END }; @@ -2891,6 +2896,8 @@ PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); PMU_FORMAT_ATTR(ldlat, "config1:0-15"); +PMU_FORMAT_ATTR(frontend, "config1:0-23"); + static struct attribute *intel_arch3_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -2907,6 +2914,11 @@ static struct attribute *intel_arch3_formats_attr[] = { NULL, }; +static struct attribute *skl_format_attr[] = { + &format_attr_frontend.attr, + NULL, +}; + static __initconst const struct x86_pmu core_pmu = { .name = "core", .handle_irq = x86_pmu_handle_irq, @@ -3516,7 +3528,8 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; - x86_pmu.cpu_events = hsw_events_attrs; + x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, + skl_format_attr); WARN_ON(!x86_pmu.format_attrs); x86_pmu.cpu_events = hsw_events_attrs; pr_cont("Skylake events, "); diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c index 086b12eae794..f32ac13934f2 100644 --- a/arch/x86/kernel/cpu/perf_event_msr.c +++ b/arch/x86/kernel/cpu/perf_event_msr.c @@ -10,12 +10,12 @@ enum perf_msr_id { PERF_MSR_EVENT_MAX, }; -bool test_aperfmperf(int idx) +static bool test_aperfmperf(int idx) { return boot_cpu_has(X86_FEATURE_APERFMPERF); } -bool test_intel(int idx) +static bool test_intel(int idx) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || boot_cpu_data.x86 != 6) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 94b7d15db3fc..2f9ed1ff0632 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -514,7 +514,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (svm->vmcb->control.next_rip != 0) { - WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS)); + WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; } @@ -866,64 +866,6 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); } -#define MTRR_TYPE_UC_MINUS 7 -#define MTRR2PROTVAL_INVALID 0xff - -static u8 mtrr2protval[8]; - -static u8 fallback_mtrr_type(int mtrr) -{ - /* - * WT and WP aren't always available in the host PAT. Treat - * them as UC and UC- respectively. Everything else should be - * there. - */ - switch (mtrr) - { - case MTRR_TYPE_WRTHROUGH: - return MTRR_TYPE_UNCACHABLE; - case MTRR_TYPE_WRPROT: - return MTRR_TYPE_UC_MINUS; - default: - BUG(); - } -} - -static void build_mtrr2protval(void) -{ - int i; - u64 pat; - - for (i = 0; i < 8; i++) - mtrr2protval[i] = MTRR2PROTVAL_INVALID; - - /* Ignore the invalid MTRR types. */ - mtrr2protval[2] = 0; - mtrr2protval[3] = 0; - - /* - * Use host PAT value to figure out the mapping from guest MTRR - * values to nested page table PAT/PCD/PWT values. We do not - * want to change the host PAT value every time we enter the - * guest. - */ - rdmsrl(MSR_IA32_CR_PAT, pat); - for (i = 0; i < 8; i++) { - u8 mtrr = pat >> (8 * i); - - if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID) - mtrr2protval[mtrr] = __cm_idx2pte(i); - } - - for (i = 0; i < 8; i++) { - if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) { - u8 fallback = fallback_mtrr_type(i); - mtrr2protval[i] = mtrr2protval[fallback]; - BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID); - } - } -} - static __init int svm_hardware_setup(void) { int cpu; @@ -990,7 +932,6 @@ static __init int svm_hardware_setup(void) } else kvm_disable_tdp(); - build_mtrr2protval(); return 0; err: @@ -1145,43 +1086,6 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) return target_tsc - tsc; } -static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat) -{ - struct kvm_vcpu *vcpu = &svm->vcpu; - - /* Unlike Intel, AMD takes the guest's CR0.CD into account. - * - * AMD does not have IPAT. To emulate it for the case of guests - * with no assigned devices, just set everything to WB. If guests - * have assigned devices, however, we cannot force WB for RAM - * pages only, so use the guest PAT directly. - */ - if (!kvm_arch_has_assigned_device(vcpu->kvm)) - *g_pat = 0x0606060606060606; - else - *g_pat = vcpu->arch.pat; -} - -static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) -{ - u8 mtrr; - - /* - * 1. MMIO: trust guest MTRR, so same as item 3. - * 2. No passthrough: always map as WB, and force guest PAT to WB as well - * 3. Passthrough: can't guarantee the result, try to trust guest. - */ - if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm)) - return 0; - - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) && - kvm_read_cr0(vcpu) & X86_CR0_CD) - return _PAGE_NOCACHE; - - mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn); - return mtrr2protval[mtrr]; -} - static void init_vmcb(struct vcpu_svm *svm, bool init_event) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -1278,7 +1182,6 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) clr_cr_intercept(svm, INTERCEPT_CR3_READ); clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); save->g_pat = svm->vcpu.arch.pat; - svm_set_guest_pat(svm, &save->g_pat); save->cr3 = 0; save->cr4 = 0; } @@ -1673,10 +1576,13 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if (!vcpu->fpu_active) cr0 |= X86_CR0_TS; - - /* These are emulated via page tables. */ - cr0 &= ~(X86_CR0_CD | X86_CR0_NW); - + /* + * re-enable caching here because the QEMU bios + * does not do it - this results in some delay at + * reboot + */ + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) + cr0 &= ~(X86_CR0_CD | X86_CR0_NW); svm->vmcb->save.cr0 = cr0; mark_dirty(svm->vmcb, VMCB_CR); update_cr0_intercept(svm); @@ -3351,16 +3257,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; - case MSR_IA32_CR_PAT: - if (npt_enabled) { - if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) - return 1; - vcpu->arch.pat = data; - svm_set_guest_pat(svm, &svm->vmcb->save.g_pat); - mark_dirty(svm->vmcb, VMCB_NPT); - break; - } - /* fall through */ default: return kvm_set_msr_common(vcpu, msr); } @@ -4195,6 +4091,11 @@ static bool svm_has_high_real_mode_segbase(void) return true; } +static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) +{ + return 0; +} + static void svm_cpuid_update(struct kvm_vcpu *vcpu) { } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 64076740251e..06ef4908ba61 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8617,17 +8617,22 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) u64 ipat = 0; /* For VT-d and EPT combination - * 1. MMIO: guest may want to apply WC, trust it. + * 1. MMIO: always map as UC * 2. EPT with VT-d: * a. VT-d without snooping control feature: can't guarantee the - * result, try to trust guest. So the same as item 1. + * result, try to trust guest. * b. VT-d with snooping control feature: snooping control feature of * VT-d engine can guarantee the cache correctness. Just set it * to WB to keep consistent with host. So the same as item 3. * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep * consistent with host MTRR */ - if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) { + if (is_mmio) { + cache = MTRR_TYPE_UNCACHABLE; + goto exit; + } + + if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { ipat = VMX_EPT_IPAT_BIT; cache = MTRR_TYPE_WRBACK; goto exit; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 991466bf8dee..92511d4b7236 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1708,8 +1708,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->pvclock_set_guest_stopped_request = false; } - pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO; - /* If the host uses TSC clocksource, then it is stable */ if (use_master_clock) pvclock_flags |= PVCLOCK_TSC_STABLE_BIT; @@ -2007,8 +2005,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) &vcpu->requests); ka->boot_vcpu_runs_old_kvmclock = tmp; - - ka->kvmclock_offset = -get_kernel_ns(); } vcpu->arch.time = data; diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index 6d88dd15c98d..197096632412 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -332,10 +332,6 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) srlen = cert->raw_serial_size; q = cert->raw_serial; } - if (srlen > 1 && *q == 0) { - srlen--; - q++; - } ret = -ENOMEM; desc = kmalloc(sulen + 2 + srlen * 2 + 1, GFP_KERNEL); diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 2614a839c60d..42c66b64c12c 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1044,8 +1044,10 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 *data) goto err_exit; mutex_lock(&ec->mutex); + result = -ENODATA; list_for_each_entry(handler, &ec->list, node) { if (value == handler->query_bit) { + result = 0; q->handler = acpi_ec_get_query_handler(handler); ec_dbg_evt("Query(0x%02x) scheduled", q->handler->query_bit); diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 6da0f9beab19..c9336751e5e3 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -372,6 +372,7 @@ static int acpi_isa_register_gsi(struct pci_dev *dev) /* Interrupt Line values above 0xF are forbidden */ if (dev->irq > 0 && (dev->irq <= 0xF) && + acpi_isa_irq_available(dev->irq) && (acpi_isa_irq_to_gsi(dev->irq, &dev_gsi) == 0)) { dev_warn(&dev->dev, "PCI INT %c: no GSI - using ISA IRQ %d\n", pin_name(dev->pin), dev->irq); diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index 3b4ea98e3ea0..7c8408b946ca 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -498,8 +498,7 @@ int __init acpi_irq_penalty_init(void) PIRQ_PENALTY_PCI_POSSIBLE; } } - /* Add a penalty for the SCI */ - acpi_irq_penalty[acpi_gbl_FADT.sci_interrupt] += PIRQ_PENALTY_PCI_USING; + return 0; } @@ -553,6 +552,13 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link) irq = link->irq.possible[i]; } } + if (acpi_irq_penalty[irq] >= PIRQ_PENALTY_ISA_ALWAYS) { + printk(KERN_ERR PREFIX "No IRQ available for %s [%s]. " + "Try pci=noacpi or acpi=off\n", + acpi_device_name(link->device), + acpi_device_bid(link->device)); + return -ENODEV; + } /* Attempt to enable the link device at this IRQ. */ if (acpi_pci_link_set(link, irq)) { @@ -821,6 +827,12 @@ void acpi_penalize_isa_irq(int irq, int active) } } +bool acpi_isa_irq_available(int irq) +{ + return irq >= 0 && (irq >= ARRAY_SIZE(acpi_irq_penalty) || + acpi_irq_penalty[irq] < PIRQ_PENALTY_ISA_ALWAYS); +} + /* * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict with * PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be use for diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 28cd75c535b0..7ae7cd990fbf 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -892,10 +892,17 @@ static int opp_get_microvolt(struct dev_pm_opp *opp, struct device *dev) u32 microvolt[3] = {0}; int count, ret; - count = of_property_count_u32_elems(opp->np, "opp-microvolt"); - if (!count) + /* Missing property isn't a problem, but an invalid entry is */ + if (!of_find_property(opp->np, "opp-microvolt", NULL)) return 0; + count = of_property_count_u32_elems(opp->np, "opp-microvolt"); + if (count < 0) { + dev_err(dev, "%s: Invalid opp-microvolt property (%d)\n", + __func__, count); + return count; + } + /* There can be one or three elements here */ if (count != 1 && count != 3) { dev_err(dev, "%s: Invalid number of elements in opp-microvolt property (%d)\n", @@ -1063,7 +1070,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_add); * share a common logic which is isolated here. * * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the - * copy operation, returns 0 if no modifcation was done OR modification was + * copy operation, returns 0 if no modification was done OR modification was * successful. * * Locking: The internal device_opp and opp structures are RCU protected. @@ -1151,7 +1158,7 @@ unlock: * mutex locking or synchronize_rcu() blocking calls cannot be used. * * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the - * copy operation, returns 0 if no modifcation was done OR modification was + * copy operation, returns 0 if no modification was done OR modification was * successful. */ int dev_pm_opp_enable(struct device *dev, unsigned long freq) @@ -1177,7 +1184,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_enable); * mutex locking or synchronize_rcu() blocking calls cannot be used. * * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the - * copy operation, returns 0 if no modifcation was done OR modification was + * copy operation, returns 0 if no modification was done OR modification was * successful. */ int dev_pm_opp_disable(struct device *dev, unsigned long freq) diff --git a/drivers/hwmon/abx500.c b/drivers/hwmon/abx500.c index 6cb89c0ebab6..1fd46859ed29 100644 --- a/drivers/hwmon/abx500.c +++ b/drivers/hwmon/abx500.c @@ -470,6 +470,7 @@ static const struct of_device_id abx500_temp_match[] = { { .compatible = "stericsson,abx500-temp" }, {}, }; +MODULE_DEVICE_TABLE(of, abx500_temp_match); #endif static struct platform_driver abx500_temp_driver = { diff --git a/drivers/hwmon/gpio-fan.c b/drivers/hwmon/gpio-fan.c index a3dae6d0082a..82de3deeb18a 100644 --- a/drivers/hwmon/gpio-fan.c +++ b/drivers/hwmon/gpio-fan.c @@ -539,6 +539,7 @@ static const struct of_device_id of_gpio_fan_match[] = { { .compatible = "gpio-fan", }, {}, }; +MODULE_DEVICE_TABLE(of, of_gpio_fan_match); #endif /* CONFIG_OF_GPIO */ static int gpio_fan_probe(struct platform_device *pdev) diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 2d9a712699ff..3e23003f78b0 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -323,6 +323,7 @@ static const struct of_device_id of_pwm_fan_match[] = { { .compatible = "pwm-fan", }, {}, }; +MODULE_DEVICE_TABLE(of, of_pwm_fan_match); static struct platform_driver pwm_fan_driver = { .probe = pwm_fan_probe, diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 3a3738fe016b..cd4510a63375 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -620,7 +620,7 @@ static struct cpuidle_state skl_cstates[] = { .name = "C6-SKL", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, - .exit_latency = 75, + .exit_latency = 85, .target_residency = 200, .enter = &intel_idle, .enter_freeze = intel_idle_freeze, }, @@ -636,11 +636,19 @@ static struct cpuidle_state skl_cstates[] = { .name = "C8-SKL", .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, - .exit_latency = 174, + .exit_latency = 200, .target_residency = 800, .enter = &intel_idle, .enter_freeze = intel_idle_freeze, }, { + .name = "C9-SKL", + .desc = "MWAIT 0x50", + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 480, + .target_residency = 5000, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, + { .name = "C10-SKL", .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 41d6911e244e..f1ccd40beae9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -245,7 +245,6 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; if (MLX5_CAP_GEN(mdev, apm)) props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; - props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; if (MLX5_CAP_GEN(mdev, xrc)) props->device_cap_flags |= IB_DEVICE_XRC; props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; @@ -795,53 +794,6 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm return 0; } -static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn) -{ - struct mlx5_create_mkey_mbox_in *in; - struct mlx5_mkey_seg *seg; - struct mlx5_core_mr mr; - int err; - - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - seg = &in->seg; - seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA; - seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); - seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - seg->start_addr = 0; - - err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in), - NULL, NULL, NULL); - if (err) { - mlx5_ib_warn(dev, "failed to create mkey, %d\n", err); - goto err_in; - } - - kfree(in); - *key = mr.key; - - return 0; - -err_in: - kfree(in); - - return err; -} - -static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key) -{ - struct mlx5_core_mr mr; - int err; - - memset(&mr, 0, sizeof(mr)); - mr.key = key; - err = mlx5_core_destroy_mkey(dev->mdev, &mr); - if (err) - mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key); -} - static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) @@ -867,13 +819,6 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, kfree(pd); return ERR_PTR(-EFAULT); } - } else { - err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn); - if (err) { - mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); - kfree(pd); - return ERR_PTR(err); - } } return &pd->ibpd; @@ -884,9 +829,6 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd) struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); - if (!pd->uobject) - free_pa_mkey(mdev, mpd->pa_lkey); - mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn); kfree(mpd); @@ -1245,18 +1187,10 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) struct ib_srq_init_attr attr; struct mlx5_ib_dev *dev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; - u32 rsvd_lkey; int ret = 0; dev = container_of(devr, struct mlx5_ib_dev, devr); - ret = mlx5_core_query_special_context(dev->mdev, &rsvd_lkey); - if (ret) { - pr_err("Failed to query special context %d\n", ret); - return ret; - } - dev->ib_dev.local_dma_lkey = rsvd_lkey; - devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); if (IS_ERR(devr->p0)) { ret = PTR_ERR(devr->p0); @@ -1418,6 +1352,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; + dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; dev->num_ports = MLX5_CAP_GEN(mdev, num_ports); dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index bb8cda79e881..22123b79d550 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -103,7 +103,6 @@ static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibuconte struct mlx5_ib_pd { struct ib_pd ibpd; u32 pdn; - u32 pa_lkey; }; /* Use macros here so that don't have to duplicate @@ -213,7 +212,6 @@ struct mlx5_ib_qp { int uuarn; int create_type; - u32 pa_lkey; /* Store signature errors */ bool signature_en; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index c745c6c5e10d..6f521a3418e8 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -925,8 +925,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, err = create_kernel_qp(dev, init_attr, qp, &in, &inlen); if (err) mlx5_ib_dbg(dev, "err %d\n", err); - else - qp->pa_lkey = to_mpd(pd)->pa_lkey; } if (err) @@ -2045,7 +2043,7 @@ static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm); dseg->addr = cpu_to_be64(mfrpl->map); dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64)); - dseg->lkey = cpu_to_be32(pd->pa_lkey); + dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); } static __be32 send_ieth(struct ib_send_wr *wr) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index ca2873698d75..4cd5428a2399 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -80,7 +80,7 @@ enum { IPOIB_NUM_WC = 4, IPOIB_MAX_PATH_REC_QUEUE = 3, - IPOIB_MAX_MCAST_QUEUE = 3, + IPOIB_MAX_MCAST_QUEUE = 64, IPOIB_FLAG_OPER_UP = 0, IPOIB_FLAG_INITIALIZED = 1, @@ -548,6 +548,8 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey); +int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast); +struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid); int ipoib_init_qp(struct net_device *dev); int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 36536ce5a3e2..f74316e679d2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1149,6 +1149,9 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) unsigned long dt; unsigned long flags; int i; + LIST_HEAD(remove_list); + struct ipoib_mcast *mcast, *tmcast; + struct net_device *dev = priv->dev; if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) return; @@ -1176,6 +1179,19 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) lockdep_is_held(&priv->lock))) != NULL) { /* was the neigh idle for two GC periods */ if (time_after(neigh_obsolete, neigh->alive)) { + u8 *mgid = neigh->daddr + 4; + + /* Is this multicast ? */ + if (*mgid == 0xff) { + mcast = __ipoib_mcast_find(dev, mgid); + + if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { + list_del(&mcast->list); + rb_erase(&mcast->rb_node, &priv->multicast_tree); + list_add_tail(&mcast->list, &remove_list); + } + } + rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, lockdep_is_held(&priv->lock))); @@ -1191,6 +1207,8 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) out_unlock: spin_unlock_irqrestore(&priv->lock, flags); + list_for_each_entry_safe(mcast, tmcast, &remove_list, list) + ipoib_mcast_leave(dev, mcast); } static void ipoib_reap_neigh(struct work_struct *work) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 09a1748f9d13..136cbefe00f8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -153,7 +153,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, return mcast; } -static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid) +struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct rb_node *n = priv->multicast_tree.rb_node; @@ -508,17 +508,19 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) rec.hop_limit = priv->broadcast->mcmember.hop_limit; /* - * Historically Linux IPoIB has never properly supported SEND - * ONLY join. It emulated it by not providing all the required - * attributes, which is enough to prevent group creation and - * detect if there are full members or not. A major problem - * with supporting SEND ONLY is detecting when the group is - * auto-destroyed as IPoIB will cache the MLID.. + * Send-only IB Multicast joins do not work at the core + * IB layer yet, so we can't use them here. However, + * we are emulating an Ethernet multicast send, which + * does not require a multicast subscription and will + * still send properly. The most appropriate thing to + * do is to create the group if it doesn't exist as that + * most closely emulates the behavior, from a user space + * application perspecitive, of Ethernet multicast + * operation. For now, we do a full join, maybe later + * when the core IB layers support send only joins we + * will use them. */ -#if 1 - if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) - comp_mask &= ~IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; -#else +#if 0 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) rec.join_state = 4; #endif @@ -675,7 +677,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev) return 0; } -static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) +int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret = 0; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 1ace5d83a4d7..f58ff96b6cbb 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -97,6 +97,11 @@ unsigned int iser_max_sectors = ISER_DEF_MAX_SECTORS; module_param_named(max_sectors, iser_max_sectors, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command (default:1024"); +bool iser_always_reg = true; +module_param_named(always_register, iser_always_reg, bool, S_IRUGO); +MODULE_PARM_DESC(always_register, + "Always register memory, even for continuous memory regions (default:true)"); + bool iser_pi_enable = false; module_param_named(pi_enable, iser_pi_enable, bool, S_IRUGO); MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)"); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 86f6583485ef..a5edd6ede692 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -611,6 +611,7 @@ extern int iser_debug_level; extern bool iser_pi_enable; extern int iser_pi_guard; extern unsigned int iser_max_sectors; +extern bool iser_always_reg; int iser_assign_reg_ops(struct iser_device *device); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 2493cc748db8..4c46d67d37a1 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -803,11 +803,12 @@ static int iser_reg_prot_sg(struct iscsi_iser_task *task, struct iser_data_buf *mem, struct iser_fr_desc *desc, + bool use_dma_key, struct iser_mem_reg *reg) { struct iser_device *device = task->iser_conn->ib_conn.device; - if (mem->dma_nents == 1) + if (use_dma_key) return iser_reg_dma(device, mem, reg); return device->reg_ops->reg_mem(task, mem, &desc->pi_ctx->rsc, reg); @@ -817,11 +818,12 @@ static int iser_reg_data_sg(struct iscsi_iser_task *task, struct iser_data_buf *mem, struct iser_fr_desc *desc, + bool use_dma_key, struct iser_mem_reg *reg) { struct iser_device *device = task->iser_conn->ib_conn.device; - if (mem->dma_nents == 1) + if (use_dma_key) return iser_reg_dma(device, mem, reg); return device->reg_ops->reg_mem(task, mem, &desc->rsc, reg); @@ -836,14 +838,17 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task, struct iser_mem_reg *reg = &task->rdma_reg[dir]; struct iser_mem_reg *data_reg; struct iser_fr_desc *desc = NULL; + bool use_dma_key; int err; err = iser_handle_unaligned_buf(task, mem, dir); if (unlikely(err)) return err; - if (mem->dma_nents != 1 || - scsi_get_prot_op(task->sc) != SCSI_PROT_NORMAL) { + use_dma_key = (mem->dma_nents == 1 && !iser_always_reg && + scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL); + + if (!use_dma_key) { desc = device->reg_ops->reg_desc_get(ib_conn); reg->mem_h = desc; } @@ -853,7 +858,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task, else data_reg = &task->desc.data_reg; - err = iser_reg_data_sg(task, mem, desc, data_reg); + err = iser_reg_data_sg(task, mem, desc, use_dma_key, data_reg); if (unlikely(err)) goto err_reg; @@ -866,7 +871,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task, if (unlikely(err)) goto err_reg; - err = iser_reg_prot_sg(task, mem, desc, prot_reg); + err = iser_reg_prot_sg(task, mem, desc, + use_dma_key, prot_reg); if (unlikely(err)) goto err_reg; } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index ae70cc1463ac..85132d867bc8 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -133,11 +133,15 @@ static int iser_create_device_ib_res(struct iser_device *device) (unsigned long)comp); } - device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ); - if (IS_ERR(device->mr)) - goto dma_mr_err; + if (!iser_always_reg) { + int access = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ; + + device->mr = ib_get_dma_mr(device->pd, access); + if (IS_ERR(device->mr)) + goto dma_mr_err; + } INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, iser_event_handler); @@ -147,7 +151,8 @@ static int iser_create_device_ib_res(struct iser_device *device) return 0; handler_err: - ib_dereg_mr(device->mr); + if (device->mr) + ib_dereg_mr(device->mr); dma_mr_err: for (i = 0; i < device->comps_used; i++) tasklet_kill(&device->comps[i].tasklet); @@ -173,7 +178,6 @@ comps_err: static void iser_free_device_ib_res(struct iser_device *device) { int i; - BUG_ON(device->mr == NULL); for (i = 0; i < device->comps_used; i++) { struct iser_comp *comp = &device->comps[i]; @@ -184,7 +188,8 @@ static void iser_free_device_ib_res(struct iser_device *device) } (void)ib_unregister_event_handler(&device->event_handler); - (void)ib_dereg_mr(device->mr); + if (device->mr) + (void)ib_dereg_mr(device->mr); ib_dealloc_pd(device->pd); kfree(device->comps); diff --git a/drivers/input/joystick/Kconfig b/drivers/input/joystick/Kconfig index 56eb471b5576..4215b5382092 100644 --- a/drivers/input/joystick/Kconfig +++ b/drivers/input/joystick/Kconfig @@ -196,6 +196,7 @@ config JOYSTICK_TWIDJOY config JOYSTICK_ZHENHUA tristate "5-byte Zhenhua RC transmitter" select SERIO + select BITREVERSE help Say Y here if you have a Zhen Hua PPM-4CH transmitter which is supplied with a ready to fly micro electric indoor helicopters diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 4664c2a96c67..d9da766719c8 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -43,7 +43,7 @@ config IOMMU_IO_PGTABLE_LPAE_SELFTEST endmenu config IOMMU_IOVA - bool + tristate config OF_IOMMU def_bool y diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 2d7349a3ee14..041bc1810a86 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3215,6 +3215,8 @@ static struct iova *intel_alloc_iova(struct device *dev, /* Restrict dma_mask to the width that the iommu can handle */ dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask); + /* Ensure we reserve the whole size-aligned region */ + nrpages = __roundup_pow_of_two(nrpages); if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) { /* @@ -3711,7 +3713,7 @@ static inline int iommu_devinfo_cache_init(void) static int __init iommu_init_mempool(void) { int ret; - ret = iommu_iova_cache_init(); + ret = iova_cache_get(); if (ret) return ret; @@ -3725,7 +3727,7 @@ static int __init iommu_init_mempool(void) kmem_cache_destroy(iommu_domain_cache); domain_error: - iommu_iova_cache_destroy(); + iova_cache_put(); return -ENOMEM; } @@ -3734,7 +3736,7 @@ static void __init iommu_exit_mempool(void) { kmem_cache_destroy(iommu_devinfo_cache); kmem_cache_destroy(iommu_domain_cache); - iommu_iova_cache_destroy(); + iova_cache_put(); } static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index b7c3d923f3e1..fa0adef32bd6 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -18,42 +18,9 @@ */ #include <linux/iova.h> +#include <linux/module.h> #include <linux/slab.h> -static struct kmem_cache *iommu_iova_cache; - -int iommu_iova_cache_init(void) -{ - int ret = 0; - - iommu_iova_cache = kmem_cache_create("iommu_iova", - sizeof(struct iova), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!iommu_iova_cache) { - pr_err("Couldn't create iova cache\n"); - ret = -ENOMEM; - } - - return ret; -} - -void iommu_iova_cache_destroy(void) -{ - kmem_cache_destroy(iommu_iova_cache); -} - -struct iova *alloc_iova_mem(void) -{ - return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC); -} - -void free_iova_mem(struct iova *iova) -{ - kmem_cache_free(iommu_iova_cache, iova); -} - void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn, unsigned long pfn_32bit) @@ -72,6 +39,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->start_pfn = start_pfn; iovad->dma_32bit_pfn = pfn_32bit; } +EXPORT_SYMBOL_GPL(init_iova_domain); static struct rb_node * __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) @@ -120,19 +88,14 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) } } -/* Computes the padding size required, to make the - * the start address naturally aligned on its size +/* + * Computes the padding size required, to make the start address + * naturally aligned on the power-of-two order of its size */ -static int -iova_get_pad_size(int size, unsigned int limit_pfn) +static unsigned int +iova_get_pad_size(unsigned int size, unsigned int limit_pfn) { - unsigned int pad_size = 0; - unsigned int order = ilog2(size); - - if (order) - pad_size = (limit_pfn + 1) % (1 << order); - - return pad_size; + return (limit_pfn + 1 - size) & (__roundup_pow_of_two(size) - 1); } static int __alloc_and_insert_iova_range(struct iova_domain *iovad, @@ -242,6 +205,57 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova) rb_insert_color(&iova->node, root); } +static struct kmem_cache *iova_cache; +static unsigned int iova_cache_users; +static DEFINE_MUTEX(iova_cache_mutex); + +struct iova *alloc_iova_mem(void) +{ + return kmem_cache_alloc(iova_cache, GFP_ATOMIC); +} +EXPORT_SYMBOL(alloc_iova_mem); + +void free_iova_mem(struct iova *iova) +{ + kmem_cache_free(iova_cache, iova); +} +EXPORT_SYMBOL(free_iova_mem); + +int iova_cache_get(void) +{ + mutex_lock(&iova_cache_mutex); + if (!iova_cache_users) { + iova_cache = kmem_cache_create( + "iommu_iova", sizeof(struct iova), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!iova_cache) { + mutex_unlock(&iova_cache_mutex); + printk(KERN_ERR "Couldn't create iova cache\n"); + return -ENOMEM; + } + } + + iova_cache_users++; + mutex_unlock(&iova_cache_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(iova_cache_get); + +void iova_cache_put(void) +{ + mutex_lock(&iova_cache_mutex); + if (WARN_ON(!iova_cache_users)) { + mutex_unlock(&iova_cache_mutex); + return; + } + iova_cache_users--; + if (!iova_cache_users) + kmem_cache_destroy(iova_cache); + mutex_unlock(&iova_cache_mutex); +} +EXPORT_SYMBOL_GPL(iova_cache_put); + /** * alloc_iova - allocates an iova * @iovad: - iova domain in question @@ -265,12 +279,6 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, if (!new_iova) return NULL; - /* If size aligned is set then round the size to - * to next power of two. - */ - if (size_aligned) - size = __roundup_pow_of_two(size); - ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn, new_iova, size_aligned); @@ -281,6 +289,7 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, return new_iova; } +EXPORT_SYMBOL_GPL(alloc_iova); /** * find_iova - find's an iova for a given pfn @@ -321,6 +330,7 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return NULL; } +EXPORT_SYMBOL_GPL(find_iova); /** * __free_iova - frees the given iova @@ -339,6 +349,7 @@ __free_iova(struct iova_domain *iovad, struct iova *iova) spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); free_iova_mem(iova); } +EXPORT_SYMBOL_GPL(__free_iova); /** * free_iova - finds and frees the iova for a given pfn @@ -356,6 +367,7 @@ free_iova(struct iova_domain *iovad, unsigned long pfn) __free_iova(iovad, iova); } +EXPORT_SYMBOL_GPL(free_iova); /** * put_iova_domain - destroys the iova doamin @@ -378,6 +390,7 @@ void put_iova_domain(struct iova_domain *iovad) } spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); } +EXPORT_SYMBOL_GPL(put_iova_domain); static int __is_range_overlap(struct rb_node *node, @@ -467,6 +480,7 @@ finish: spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return iova; } +EXPORT_SYMBOL_GPL(reserve_iova); /** * copy_reserved_iova - copies the reserved between domains @@ -493,6 +507,7 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) } spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); } +EXPORT_SYMBOL_GPL(copy_reserved_iova); struct iova * split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, @@ -534,3 +549,6 @@ error: free_iova_mem(prev); return NULL; } + +MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index af2f16bb8a94..aeaa061f0dbf 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -320,6 +320,14 @@ static void gic_handle_shared_int(bool chained) intrmask[i] = gic_read(intrmask_reg); pending_reg += gic_reg_step; intrmask_reg += gic_reg_step; + + if (!config_enabled(CONFIG_64BIT) || mips_cm_is64) + continue; + + pending[i] |= (u64)gic_read(pending_reg) << 32; + intrmask[i] |= (u64)gic_read(intrmask_reg) << 32; + pending_reg += gic_reg_step; + intrmask_reg += gic_reg_step; } bitmap_and(pending, pending, intrmask, gic_shared_intrs); @@ -426,7 +434,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *cpumask, spin_lock_irqsave(&gic_lock, flags); /* Re-route this IRQ */ - gic_map_to_vpe(irq, cpumask_first(&tmp)); + gic_map_to_vpe(irq, mips_cm_vp_id(cpumask_first(&tmp))); /* Update the pcpu_masks */ for (i = 0; i < NR_CPUS; i++) @@ -599,7 +607,7 @@ static __init void gic_ipi_init_one(unsigned int intr, int cpu, GIC_SHARED_TO_HWIRQ(intr)); int i; - gic_map_to_vpe(intr, cpu); + gic_map_to_vpe(intr, mips_cm_vp_id(cpu)); for (i = 0; i < NR_CPUS; i++) clear_bit(intr, pcpu_masks[i].pcpu_mask); set_bit(intr, pcpu_masks[cpu].pcpu_mask); diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 5bbd1f094f4e..1fc23e48fe8e 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -926,6 +926,11 @@ static int validate_vid_hdr(const struct ubi_device *ubi, goto bad; } + if (data_size > ubi->leb_size) { + ubi_err(ubi, "bad data_size"); + goto bad; + } + if (vol_type == UBI_VID_STATIC) { /* * Although from high-level point of view static volumes may diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 80bdd5b88bac..d85c19762160 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -649,6 +649,7 @@ static int init_volumes(struct ubi_device *ubi, if (ubi->corr_peb_count) ubi_err(ubi, "%d PEBs are corrupted and not used", ubi->corr_peb_count); + return -ENOSPC; } ubi->rsvd_pebs += reserved_pebs; ubi->avail_pebs -= reserved_pebs; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 275d9fb6fe5c..eb4489f9082f 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1601,6 +1601,7 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) if (ubi->corr_peb_count) ubi_err(ubi, "%d PEBs are corrupted and not used", ubi->corr_peb_count); + err = -ENOSPC; goto out_free; } ubi->avail_pebs -= reserved_pebs; diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index f8baa897d1a0..1f7dd927cc5e 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -2051,6 +2051,8 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA; else reg |= PORT_CONTROL_FRAME_MODE_DSA; + reg |= PORT_CONTROL_FORWARD_UNKNOWN | + PORT_CONTROL_FORWARD_UNKNOWN_MC; } if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index b7a0f7879de2..9e59663a6ead 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -1543,7 +1543,7 @@ bfa_flash_cmd_act_check(void __iomem *pci_bar) } /* Flush FLI data fifo. */ -static u32 +static int bfa_flash_fifo_flush(void __iomem *pci_bar) { u32 i; @@ -1573,11 +1573,11 @@ bfa_flash_fifo_flush(void __iomem *pci_bar) } /* Read flash status. */ -static u32 +static int bfa_flash_status_read(void __iomem *pci_bar) { union bfa_flash_dev_status_reg dev_status; - u32 status; + int status; u32 ret_status; int i; @@ -1611,11 +1611,11 @@ bfa_flash_status_read(void __iomem *pci_bar) } /* Start flash read operation. */ -static u32 +static int bfa_flash_read_start(void __iomem *pci_bar, u32 offset, u32 len, char *buf) { - u32 status; + int status; /* len must be mutiple of 4 and not exceeding fifo size */ if (len == 0 || len > BFA_FLASH_FIFO_SIZE || (len & 0x03) != 0) @@ -1703,7 +1703,8 @@ static enum bfa_status bfa_flash_raw_read(void __iomem *pci_bar, u32 offset, char *buf, u32 len) { - u32 n, status; + u32 n; + int status; u32 off, l, s, residue, fifo_sz; residue = len; diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index cc2d8b4b18e3..253f8ed0537a 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -816,7 +816,7 @@ static int hip04_mac_probe(struct platform_device *pdev) struct net_device *ndev; struct hip04_priv *priv; struct resource *res; - unsigned int irq; + int irq; int ret; ndev = alloc_etherdev(sizeof(struct hip04_priv)); diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h index 28df37420da9..ac02c675c59c 100644 --- a/drivers/net/ethernet/ibm/emac/core.h +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -460,8 +460,8 @@ struct emac_ethtool_regs_subhdr { u32 index; }; -#define EMAC_ETHTOOL_REGS_VER 0 -#define EMAC4_ETHTOOL_REGS_VER 1 -#define EMAC4SYNC_ETHTOOL_REGS_VER 2 +#define EMAC_ETHTOOL_REGS_VER 3 +#define EMAC4_ETHTOOL_REGS_VER 4 +#define EMAC4SYNC_ETHTOOL_REGS_VER 5 #endif /* __IBM_NEWEMAC_CORE_H */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 3e0d20037675..62488a67149d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -946,6 +946,13 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, /* take the lock before we start messing with the ring */ mutex_lock(&hw->aq.arq_mutex); + if (hw->aq.arq.count == 0) { + i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, + "AQRX: Admin queue not initialized.\n"); + ret_code = I40E_ERR_QUEUE_EMPTY; + goto clean_arq_element_err; + } + /* set next_to_use to head */ ntu = (rd32(hw, hw->aq.arq.head) & I40E_PF_ARQH_ARQH_MASK); if (ntu == ntc) { @@ -1007,6 +1014,8 @@ clean_arq_element_out: /* Set pending if needed, unlock and return */ if (pending != NULL) *pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc); + +clean_arq_element_err: mutex_unlock(&hw->aq.arq_mutex); if (i40e_is_nvm_update_op(&e->desc)) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 851c1a159be8..2fdf978ae6a5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2672,7 +2672,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) rx_ctx.lrxqthresh = 2; rx_ctx.crcstrip = 1; rx_ctx.l2tsel = 1; - rx_ctx.showiv = 1; + /* this controls whether VLAN is stripped from inner headers */ + rx_ctx.showiv = 0; #ifdef I40E_FCOE rx_ctx.fc_ena = (vsi->type == I40E_VSI_FCOE); #endif diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c index f08450b90774..929d47152bf2 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c @@ -887,6 +887,13 @@ i40e_status i40evf_clean_arq_element(struct i40e_hw *hw, /* take the lock before we start messing with the ring */ mutex_lock(&hw->aq.arq_mutex); + if (hw->aq.arq.count == 0) { + i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, + "AQRX: Admin queue not initialized.\n"); + ret_code = I40E_ERR_QUEUE_EMPTY; + goto clean_arq_element_err; + } + /* set next_to_use to head */ ntu = (rd32(hw, hw->aq.arq.head) & I40E_VF_ARQH1_ARQH_MASK); if (ntu == ntc) { @@ -948,6 +955,8 @@ clean_arq_element_out: /* Set pending if needed, unlock and return */ if (pending != NULL) *pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc); + +clean_arq_element_err: mutex_unlock(&hw->aq.arq_mutex); return ret_code; diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index bd9ea0d01aae..1d4e2e054647 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1184,10 +1184,11 @@ out: if (prot == MLX4_PROT_ETH) { /* manage the steering entry for promisc mode */ if (new_entry) - new_steering_entry(dev, port, steer, index, qp->qpn); + err = new_steering_entry(dev, port, steer, + index, qp->qpn); else - existing_steering_entry(dev, port, steer, - index, qp->qpn); + err = existing_steering_entry(dev, port, steer, + index, qp->qpn); } if (err && link && index != -1) { if (index < dev->caps.num_mgms) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index aa0d5ffe92d8..9335e5ae18cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -200,25 +200,3 @@ int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev) return err; } - -int mlx5_core_query_special_context(struct mlx5_core_dev *dev, u32 *rsvd_lkey) -{ - struct mlx5_cmd_query_special_contexts_mbox_in in; - struct mlx5_cmd_query_special_contexts_mbox_out out; - int err; - - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - - *rsvd_lkey = be32_to_cpu(out.resd_lkey); - - return err; -} -EXPORT_SYMBOL(mlx5_core_query_special_context); diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 2b32e0c5a0b4..b4f21232019a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6081,7 +6081,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp) { void __iomem *ioaddr = tp->mmio_addr; struct pci_dev *pdev = tp->pci_dev; - u16 rg_saw_cnt; + int rg_saw_cnt; u32 data; static const struct ephy_info e_info_8168h_1[] = { { 0x1e, 0x0800, 0x0001 }, diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index dd652f2ae03d..108a3118ace7 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -299,9 +299,10 @@ static long local_pci_probe(void *_ddi) * Unbound PCI devices are always put in D0, regardless of * runtime PM status. During probe, the device is set to * active and the usage count is incremented. If the driver - * supports runtime PM, it should call pm_runtime_put_noidle() - * in its probe routine and pm_runtime_get_noresume() in its - * remove routine. + * supports runtime PM, it should call pm_runtime_put_noidle(), + * or any other runtime PM helper function decrementing the usage + * count, in its probe routine and pm_runtime_get_noresume() in + * its remove routine. */ pm_runtime_get_sync(dev); pci_dev->driver = pci_drv; diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c index 7ff96270c933..e570ff084add 100644 --- a/drivers/thermal/power_allocator.c +++ b/drivers/thermal/power_allocator.c @@ -144,6 +144,16 @@ static void estimate_pid_constants(struct thermal_zone_device *tz, switch_on_temp = 0; temperature_threshold = control_temp - switch_on_temp; + /* + * estimate_pid_constants() tries to find appropriate default + * values for thermal zones that don't provide them. If a + * system integrator has configured a thermal zone with two + * passive trip points at the same temperature, that person + * hasn't put any effort to set up the thermal zone properly + * so just give up. + */ + if (!temperature_threshold) + return; if (!tz->tzp->k_po || force) tz->tzp->k_po = int_to_frac(sustainable_power) / diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index c68edc16aa54..79e1aa1b0959 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -817,8 +817,9 @@ config ITCO_WDT tristate "Intel TCO Timer/Watchdog" depends on (X86 || IA64) && PCI select WATCHDOG_CORE + depends on I2C || I2C=n select LPC_ICH if !EXPERT - select I2C_I801 if !EXPERT + select I2C_I801 if !EXPERT && I2C ---help--- Hardware driver for the intel TCO timer based watchdog devices. These drivers are included in the Intel 82801 I/O Controller diff --git a/drivers/watchdog/bcm2835_wdt.c b/drivers/watchdog/bcm2835_wdt.c index 66c3e656a616..8a5ce5b5a0b6 100644 --- a/drivers/watchdog/bcm2835_wdt.c +++ b/drivers/watchdog/bcm2835_wdt.c @@ -36,6 +36,13 @@ #define PM_RSTC_WRCFG_FULL_RESET 0x00000020 #define PM_RSTC_RESET 0x00000102 +/* + * The Raspberry Pi firmware uses the RSTS register to know which partiton + * to boot from. The partiton value is spread into bits 0, 2, 4, 6, 8, 10. + * Partiton 63 is a special partition used by the firmware to indicate halt. + */ +#define PM_RSTS_RASPBERRYPI_HALT 0x555 + #define SECS_TO_WDOG_TICKS(x) ((x) << 16) #define WDOG_TICKS_TO_SECS(x) ((x) >> 16) @@ -151,8 +158,7 @@ static void bcm2835_power_off(void) * hard reset. */ val = readl_relaxed(wdt->base + PM_RSTS); - val &= PM_RSTC_WRCFG_CLR; - val |= PM_PASSWORD | PM_RSTS_HADWRH_SET; + val |= PM_PASSWORD | PM_RSTS_RASPBERRYPI_HALT; writel_relaxed(val, wdt->base + PM_RSTS); /* Continue with normal reset mechanism */ diff --git a/drivers/watchdog/gef_wdt.c b/drivers/watchdog/gef_wdt.c index cc1bdfc2ff71..006e2348022c 100644 --- a/drivers/watchdog/gef_wdt.c +++ b/drivers/watchdog/gef_wdt.c @@ -303,6 +303,7 @@ static const struct of_device_id gef_wdt_ids[] = { }, {}, }; +MODULE_DEVICE_TABLE(of, gef_wdt_ids); static struct platform_driver gef_wdt_driver = { .driver = { diff --git a/drivers/watchdog/mena21_wdt.c b/drivers/watchdog/mena21_wdt.c index 69013007dc47..098fa9c34d6d 100644 --- a/drivers/watchdog/mena21_wdt.c +++ b/drivers/watchdog/mena21_wdt.c @@ -253,6 +253,7 @@ static const struct of_device_id a21_wdt_ids[] = { { .compatible = "men,a021-wdt" }, { }, }; +MODULE_DEVICE_TABLE(of, a21_wdt_ids); static struct platform_driver a21_wdt_driver = { .probe = a21_wdt_probe, diff --git a/drivers/watchdog/moxart_wdt.c b/drivers/watchdog/moxart_wdt.c index 2789da2c0515..60b0605bd7e6 100644 --- a/drivers/watchdog/moxart_wdt.c +++ b/drivers/watchdog/moxart_wdt.c @@ -168,6 +168,7 @@ static const struct of_device_id moxart_watchdog_match[] = { { .compatible = "moxa,moxart-watchdog" }, { }, }; +MODULE_DEVICE_TABLE(of, moxart_watchdog_match); static struct platform_driver moxart_wdt_driver = { .probe = moxart_wdt_probe, @@ -569,8 +569,20 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) goto fallback; + sector = bh.b_blocknr << (blkbits - 9); + if (buffer_unwritten(&bh) || buffer_new(&bh)) { int i; + + length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn, + bh.b_size); + if (length < 0) { + result = VM_FAULT_SIGBUS; + goto out; + } + if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR)) + goto fallback; + for (i = 0; i < PTRS_PER_PMD; i++) clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE); wmb_pmem(); @@ -623,7 +635,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, result = VM_FAULT_NOPAGE; spin_unlock(ptl); } else { - sector = bh.b_blocknr << (blkbits - 9); length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn, bh.b_size); if (length < 0) { diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 96f3448b6eb4..fd65b3f1923c 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -652,11 +652,8 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode, { int err; - mutex_lock(&inode->i_mutex); err = security_inode_init_security(inode, dentry, qstr, &init_xattrs, 0); - mutex_unlock(&inode->i_mutex); - if (err) { struct ubifs_info *c = dentry->i_sb->s_fs_info; ubifs_err(c, "cannot initialize security for inode %lu, error %d", diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 7235c4851460..43856d19cf4d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -217,6 +217,7 @@ struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); +bool acpi_isa_irq_available(int irq); void acpi_penalize_sci_irq(int irq, int trigger, int polarity); void acpi_pci_irq_disable (struct pci_dev *dev); diff --git a/include/linux/iova.h b/include/linux/iova.h index 3920a19d8194..92f7177db2ce 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -68,8 +68,8 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) return iova >> iova_shift(iovad); } -int iommu_iova_cache_init(void); -void iommu_iova_cache_destroy(void); +int iova_cache_get(void); +void iova_cache_put(void); struct iova *alloc_iova_mem(void); void free_iova_mem(struct iova *iova); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ad800e62cb7a..6452ff4c463f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -242,7 +242,6 @@ struct mem_cgroup { * percpu counter. */ struct mem_cgroup_stat_cpu __percpu *stat; - spinlock_t pcp_counter_lock; #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) struct cg_proto tcp_mem; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8eb3b19af2a4..250b1ff8b48d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -402,17 +402,6 @@ struct mlx5_cmd_teardown_hca_mbox_out { u8 rsvd[8]; }; -struct mlx5_cmd_query_special_contexts_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_cmd_query_special_contexts_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 dump_fill_mkey; - __be32 resd_lkey; -}; - struct mlx5_cmd_layout { u8 type; u8 rsvd0[3]; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 27b53f9a24ad..8b6d6f2154a4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -845,7 +845,6 @@ void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); -int mlx5_core_query_special_context(struct mlx5_core_dev *dev, u32 *rsvd_lkey); struct mlx5_profile { u64 mask; diff --git a/include/linux/mm.h b/include/linux/mm.h index 91c08f6f0dc9..80001de019ba 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -905,6 +905,27 @@ static inline void set_page_links(struct page *page, enum zone_type zone, #endif } +#ifdef CONFIG_MEMCG +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + return page->mem_cgroup; +} + +static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) +{ + page->mem_cgroup = memcg; +} +#else +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + return NULL; +} + +static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) +{ +} +#endif + /* * Some inline functions in vmstat.h depend on page_zone() */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ff476515f716..581abf848566 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -230,12 +230,11 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, struct rcu_synchronize *rs_array); #define _wait_rcu_gp(checktiny, ...) \ -do { \ - call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ - const int __n = ARRAY_SIZE(__crcu_array); \ - struct rcu_synchronize __rs_array[__n]; \ - \ - __wait_rcu_gp(checktiny, __n, __crcu_array, __rs_array); \ +do { \ + call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ + struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \ + __wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array), \ + __crcu_array, __rs_array); \ } while (0) #define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2b0a30a6e31c..4398411236f1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2708,7 +2708,7 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); else if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_start_offset(skb) <= len) + skb_checksum_start_offset(skb) < 0) skb->ip_summed = CHECKSUM_NONE; } diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 4a167b30a12f..cb1b9bbda332 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -63,7 +63,11 @@ struct unix_sock { #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; }; -#define unix_sk(__sk) ((struct unix_sock *)__sk) + +static inline struct unix_sock *unix_sk(struct sock *sk) +{ + return (struct unix_sock *)sk; +} #define peer_wait peer_wq.wait diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index df0e09bb7dd5..9057d7af3ae1 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -11,8 +11,6 @@ #include <linux/types.h> -#include <linux/compiler.h> - #define UFFD_API ((__u64)0xAA) /* * After implementing the respective features it will become: diff --git a/ipc/msg.c b/ipc/msg.c index 66c4f567eb73..1471db9a7e61 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -137,13 +137,6 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) return retval; } - /* ipc_addid() locks msq upon success. */ - id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); - if (id < 0) { - ipc_rcu_putref(msq, msg_rcu_free); - return id; - } - msq->q_stime = msq->q_rtime = 0; msq->q_ctime = get_seconds(); msq->q_cbytes = msq->q_qnum = 0; @@ -153,6 +146,13 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) INIT_LIST_HEAD(&msq->q_receivers); INIT_LIST_HEAD(&msq->q_senders); + /* ipc_addid() locks msq upon success. */ + id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); + if (id < 0) { + ipc_rcu_putref(msq, msg_rcu_free); + return id; + } + ipc_unlock_object(&msq->q_perm); rcu_read_unlock(); diff --git a/ipc/shm.c b/ipc/shm.c index 222131e8e38f..41787276e141 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -551,12 +551,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (IS_ERR(file)) goto no_file; - id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); - if (id < 0) { - error = id; - goto no_id; - } - shp->shm_cprid = task_tgid_vnr(current); shp->shm_lprid = 0; shp->shm_atim = shp->shm_dtim = 0; @@ -565,6 +559,13 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_nattch = 0; shp->shm_file = file; shp->shm_creator = current; + + id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); + if (id < 0) { + error = id; + goto no_id; + } + list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); /* diff --git a/ipc/util.c b/ipc/util.c index be4230020a1f..0f401d94b7c6 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -237,6 +237,10 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) rcu_read_lock(); spin_lock(&new->lock); + current_euid_egid(&euid, &egid); + new->cuid = new->uid = euid; + new->gid = new->cgid = egid; + id = idr_alloc(&ids->ipcs_idr, new, (next_id < 0) ? 0 : ipcid_to_idx(next_id), 0, GFP_NOWAIT); @@ -249,10 +253,6 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) ids->in_use++; - current_euid_egid(&euid, &egid); - new->cuid = new->uid = euid; - new->gid = new->cgid = egid; - if (next_id < 0) { new->seq = ids->seq++; if (ids->seq > IPCID_SEQ_MAX) diff --git a/kernel/events/core.c b/kernel/events/core.c index f548f69c4299..b11756f9b6dc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1243,11 +1243,7 @@ static inline void perf_event__state_init(struct perf_event *event) PERF_EVENT_STATE_INACTIVE; } -/* - * Called at perf_event creation and when events are attached/detached from a - * group. - */ -static void perf_event__read_size(struct perf_event *event) +static void __perf_event_read_size(struct perf_event *event, int nr_siblings) { int entry = sizeof(u64); /* value */ int size = 0; @@ -1263,7 +1259,7 @@ static void perf_event__read_size(struct perf_event *event) entry += sizeof(u64); if (event->attr.read_format & PERF_FORMAT_GROUP) { - nr += event->group_leader->nr_siblings; + nr += nr_siblings; size += sizeof(u64); } @@ -1271,14 +1267,11 @@ static void perf_event__read_size(struct perf_event *event) event->read_size = size; } -static void perf_event__header_size(struct perf_event *event) +static void __perf_event_header_size(struct perf_event *event, u64 sample_type) { struct perf_sample_data *data; - u64 sample_type = event->attr.sample_type; u16 size = 0; - perf_event__read_size(event); - if (sample_type & PERF_SAMPLE_IP) size += sizeof(data->ip); @@ -1303,6 +1296,17 @@ static void perf_event__header_size(struct perf_event *event) event->header_size = size; } +/* + * Called at perf_event creation and when events are attached/detached from a + * group. + */ +static void perf_event__header_size(struct perf_event *event) +{ + __perf_event_read_size(event, + event->group_leader->nr_siblings); + __perf_event_header_size(event, event->attr.sample_type); +} + static void perf_event__id_header_size(struct perf_event *event) { struct perf_sample_data *data; @@ -1330,6 +1334,27 @@ static void perf_event__id_header_size(struct perf_event *event) event->id_header_size = size; } +static bool perf_event_validate_size(struct perf_event *event) +{ + /* + * The values computed here will be over-written when we actually + * attach the event. + */ + __perf_event_read_size(event, event->group_leader->nr_siblings + 1); + __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ); + perf_event__id_header_size(event); + + /* + * Sum the lot; should not exceed the 64k limit we have on records. + * Conservative limit to allow for callchains and other variable fields. + */ + if (event->read_size + event->header_size + + event->id_header_size + sizeof(struct perf_event_header) >= 16*1024) + return false; + + return true; +} + static void perf_group_attach(struct perf_event *event) { struct perf_event *group_leader = event->group_leader, *pos; @@ -8297,13 +8322,35 @@ SYSCALL_DEFINE5(perf_event_open, if (move_group) { gctx = group_leader->ctx; + mutex_lock_double(&gctx->mutex, &ctx->mutex); + } else { + mutex_lock(&ctx->mutex); + } + if (!perf_event_validate_size(event)) { + err = -E2BIG; + goto err_locked; + } + + /* + * Must be under the same ctx::mutex as perf_install_in_context(), + * because we need to serialize with concurrent event creation. + */ + if (!exclusive_event_installable(event, ctx)) { + /* exclusive and group stuff are assumed mutually exclusive */ + WARN_ON_ONCE(move_group); + + err = -EBUSY; + goto err_locked; + } + + WARN_ON_ONCE(ctx->parent_ctx); + + if (move_group) { /* * See perf_event_ctx_lock() for comments on the details * of swizzling perf_event::ctx. */ - mutex_lock_double(&gctx->mutex, &ctx->mutex); - perf_remove_from_context(group_leader, false); list_for_each_entry(sibling, &group_leader->sibling_list, @@ -8311,13 +8358,7 @@ SYSCALL_DEFINE5(perf_event_open, perf_remove_from_context(sibling, false); put_ctx(gctx); } - } else { - mutex_lock(&ctx->mutex); - } - WARN_ON_ONCE(ctx->parent_ctx); - - if (move_group) { /* * Wait for everybody to stop referencing the events through * the old lists, before installing it on new lists. @@ -8349,22 +8390,29 @@ SYSCALL_DEFINE5(perf_event_open, perf_event__state_init(group_leader); perf_install_in_context(ctx, group_leader, group_leader->cpu); get_ctx(ctx); - } - if (!exclusive_event_installable(event, ctx)) { - err = -EBUSY; - mutex_unlock(&ctx->mutex); - fput(event_file); - goto err_context; + /* + * Now that all events are installed in @ctx, nothing + * references @gctx anymore, so drop the last reference we have + * on it. + */ + put_ctx(gctx); } + /* + * Precalculate sample_data sizes; do while holding ctx::mutex such + * that we're serialized against further additions and before + * perf_install_in_context() which is the point the event is active and + * can use these values. + */ + perf_event__header_size(event); + perf_event__id_header_size(event); + perf_install_in_context(ctx, event, event->cpu); perf_unpin_context(ctx); - if (move_group) { + if (move_group) mutex_unlock(&gctx->mutex); - put_ctx(gctx); - } mutex_unlock(&ctx->mutex); put_online_cpus(); @@ -8376,12 +8424,6 @@ SYSCALL_DEFINE5(perf_event_open, mutex_unlock(¤t->perf_event_mutex); /* - * Precalculate sample_data sizes - */ - perf_event__header_size(event); - perf_event__id_header_size(event); - - /* * Drop the reference on the group_event after placing the * new event on the sibling_list. This ensures destruction * of the group leader will find the pointer to itself in @@ -8391,6 +8433,12 @@ SYSCALL_DEFINE5(perf_event_open, fd_install(event_fd, event_file); return event_fd; +err_locked: + if (move_group) + mutex_unlock(&gctx->mutex); + mutex_unlock(&ctx->mutex); +/* err_file: */ + fput(event_file); err_context: perf_unpin_context(ctx); put_ctx(ctx); diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 8acfbf773e06..4e49cc4c9952 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3068,7 +3068,7 @@ static int __lock_is_held(struct lockdep_map *lock); static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, int hardirqs_off, struct lockdep_map *nest_lock, unsigned long ip, - int references) + int references, int pin_count) { struct task_struct *curr = current; struct lock_class *class = NULL; @@ -3157,7 +3157,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, hlock->waittime_stamp = 0; hlock->holdtime_stamp = lockstat_clock(); #endif - hlock->pin_count = 0; + hlock->pin_count = pin_count; if (check && !mark_irqflags(curr, hlock)) return 0; @@ -3343,7 +3343,7 @@ found_it: hlock_class(hlock)->subclass, hlock->trylock, hlock->read, hlock->check, hlock->hardirqs_off, hlock->nest_lock, hlock->acquire_ip, - hlock->references)) + hlock->references, hlock->pin_count)) return 0; } @@ -3433,7 +3433,7 @@ found_it: hlock_class(hlock)->subclass, hlock->trylock, hlock->read, hlock->check, hlock->hardirqs_off, hlock->nest_lock, hlock->acquire_ip, - hlock->references)) + hlock->references, hlock->pin_count)) return 0; } @@ -3583,7 +3583,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, current->lockdep_recursion = 1; trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); __lock_acquire(lock, subclass, trylock, read, check, - irqs_disabled_flags(flags), nest_lock, ip, 0); + irqs_disabled_flags(flags), nest_lock, ip, 0, 0); current->lockdep_recursion = 0; raw_local_irq_restore(flags); } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9f75f25cc5d9..775d36cc0050 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3868,6 +3868,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf) static void __init rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) { + static struct lock_class_key rcu_exp_sched_rdp_class; unsigned long flags; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rcu_get_root(rsp); @@ -3883,6 +3884,10 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) mutex_init(&rdp->exp_funnel_mutex); rcu_boot_init_nocb_percpu_data(rdp); raw_spin_unlock_irqrestore(&rnp->lock, flags); + if (rsp == &rcu_sched_state) + lockdep_set_class_and_name(&rdp->exp_funnel_mutex, + &rcu_exp_sched_rdp_class, + "rcu_data_exp_sched"); } /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2f9c92884817..615953141951 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4934,7 +4934,15 @@ void init_idle(struct task_struct *idle, int cpu) idle->state = TASK_RUNNING; idle->se.exec_start = sched_clock(); - do_set_cpus_allowed(idle, cpumask_of(cpu)); +#ifdef CONFIG_SMP + /* + * Its possible that init_idle() gets called multiple times on a task, + * in that case do_set_cpus_allowed() will not do the right thing. + * + * And since this is boot we can forgo the serialization. + */ + set_cpus_allowed_common(idle, cpumask_of(cpu)); +#endif /* * We're having a chicken and egg problem, even though we are * holding rq->lock, the cpu isn't yet set to this cpu so the @@ -4951,7 +4959,7 @@ void init_idle(struct task_struct *idle, int cpu) rq->curr = rq->idle = idle; idle->on_rq = TASK_ON_RQ_QUEUED; -#if defined(CONFIG_SMP) +#ifdef CONFIG_SMP idle->on_cpu = 1; #endif raw_spin_unlock(&rq->lock); @@ -4966,7 +4974,7 @@ void init_idle(struct task_struct *idle, int cpu) idle->sched_class = &idle_sched_class; ftrace_graph_init_idle_task(idle, cpu); vtime_init_idle(idle, cpu); -#if defined(CONFIG_SMP) +#ifdef CONFIG_SMP sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); #endif } diff --git a/mm/dmapool.c b/mm/dmapool.c index 71a8998cd03a..312a716fa14c 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c @@ -394,7 +394,7 @@ static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma) list_for_each_entry(page, &pool->page_list, page_list) { if (dma < page->dma) continue; - if (dma < (page->dma + pool->allocation)) + if ((dma - page->dma) < pool->allocation) return page; } return NULL; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 999fb0aef8f1..9cc773483624 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3202,6 +3202,14 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, continue; /* + * Shared VMAs have their own reserves and do not affect + * MAP_PRIVATE accounting but it is possible that a shared + * VMA is using the same page so check and skip such VMAs. + */ + if (iter_vma->vm_flags & VM_MAYSHARE) + continue; + + /* * Unmap the page from other VMAs without their own reserves. * They get marked to be SIGKILLed if they fault in these * areas. This is because a future no-page fault on this VMA diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6ddaeba34e09..1fedbde68f59 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -644,12 +644,14 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) } /* + * Return page count for single (non recursive) @memcg. + * * Implementation Note: reading percpu statistics for memcg. * * Both of vmstat[] and percpu_counter has threshold and do periodic * synchronization to implement "quick" read. There are trade-off between * reading cost and precision of value. Then, we may have a chance to implement - * a periodic synchronizion of counter in memcg's counter. + * a periodic synchronization of counter in memcg's counter. * * But this _read() function is used for user interface now. The user accounts * memory usage by memory cgroup and he _always_ requires exact value because @@ -659,17 +661,24 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) * * If there are kernel internal actions which can make use of some not-exact * value, and reading all cpu value can be performance bottleneck in some - * common workload, threashold and synchonization as vmstat[] should be + * common workload, threshold and synchronization as vmstat[] should be * implemented. */ -static long mem_cgroup_read_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx) +static unsigned long +mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { long val = 0; int cpu; + /* Per-cpu values can be negative, use a signed accumulator */ for_each_possible_cpu(cpu) val += per_cpu(memcg->stat->count[idx], cpu); + /* + * Summing races with updates, so val may be negative. Avoid exposing + * transient negative values. + */ + if (val < 0) + val = 0; return val; } @@ -1254,7 +1263,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) continue; - pr_cont(" %s:%ldKB", mem_cgroup_stat_names[i], + pr_cont(" %s:%luKB", mem_cgroup_stat_names[i], K(mem_cgroup_read_stat(iter, i))); } @@ -2819,14 +2828,11 @@ static unsigned long tree_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { struct mem_cgroup *iter; - long val = 0; + unsigned long val = 0; - /* Per-cpu values can be negative, use a signed accumulator */ for_each_mem_cgroup_tree(iter, memcg) val += mem_cgroup_read_stat(iter, idx); - if (val < 0) /* race ? */ - val = 0; return val; } @@ -3169,7 +3175,7 @@ static int memcg_stat_show(struct seq_file *m, void *v) for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) continue; - seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i], + seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i], mem_cgroup_read_stat(memcg, i) * PAGE_SIZE); } @@ -3194,13 +3200,13 @@ static int memcg_stat_show(struct seq_file *m, void *v) (u64)memsw * PAGE_SIZE); for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { - long long val = 0; + unsigned long long val = 0; if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) continue; for_each_mem_cgroup_tree(mi, memcg) val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE; - seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val); + seq_printf(m, "total_%s %llu\n", mem_cgroup_stat_names[i], val); } for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { @@ -4179,7 +4185,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void) if (memcg_wb_domain_init(memcg, GFP_KERNEL)) goto out_free_stat; - spin_lock_init(&memcg->pcp_counter_lock); return memcg; out_free_stat: diff --git a/mm/migrate.c b/mm/migrate.c index 7452a00bbb50..842ecd7aaf7f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -740,6 +740,15 @@ static int move_to_new_page(struct page *newpage, struct page *page, if (PageSwapBacked(page)) SetPageSwapBacked(newpage); + /* + * Indirectly called below, migrate_page_copy() copies PG_dirty and thus + * needs newpage's memcg set to transfer memcg dirty page accounting. + * So perform memcg migration in two steps: + * 1. set newpage->mem_cgroup (here) + * 2. clear page->mem_cgroup (below) + */ + set_page_memcg(newpage, page_memcg(page)); + mapping = page_mapping(page); if (!mapping) rc = migrate_page(mapping, newpage, page, mode); @@ -756,9 +765,10 @@ static int move_to_new_page(struct page *newpage, struct page *page, rc = fallback_migrate_page(mapping, newpage, page, mode); if (rc != MIGRATEPAGE_SUCCESS) { + set_page_memcg(newpage, NULL); newpage->mapping = NULL; } else { - mem_cgroup_migrate(page, newpage, false); + set_page_memcg(page, NULL); if (page_was_mapped) remove_migration_ptes(page, newpage); page->mapping = NULL; diff --git a/mm/slab.c b/mm/slab.c index c77ebe6cc87c..4fcc5dd8d5a6 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2190,9 +2190,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) size += BYTES_PER_WORD; } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) - if (size >= kmalloc_size(INDEX_NODE + 1) - && cachep->object_size > cache_line_size() - && ALIGN(size, cachep->align) < PAGE_SIZE) { + /* + * To activate debug pagealloc, off-slab management is necessary + * requirement. In early phase of initialization, small sized slab + * doesn't get initialized so it would not be possible. So, we need + * to check size >= 256. It guarantees that all necessary small + * sized slab is initialized in current slab initialization sequence. + */ + if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) && + size >= 256 && cachep->object_size > cache_line_size() && + ALIGN(size, cachep->align) < PAGE_SIZE) { cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align); size = PAGE_SIZE; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 805a95a48107..830f8a7c1cb1 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -31,7 +31,6 @@ static const char fmt_hex[] = "%#x\n"; static const char fmt_long_hex[] = "%#lx\n"; static const char fmt_dec[] = "%d\n"; -static const char fmt_udec[] = "%u\n"; static const char fmt_ulong[] = "%lu\n"; static const char fmt_u64[] = "%llu\n"; @@ -202,7 +201,7 @@ static ssize_t speed_show(struct device *dev, if (netif_running(netdev)) { struct ethtool_cmd cmd; if (!__ethtool_get_settings(netdev, &cmd)) - ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd)); + ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd)); } rtnl_unlock(); return ret; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index dad4dd37e2aa..fab4599ba8b2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2958,11 +2958,12 @@ EXPORT_SYMBOL_GPL(skb_append_pagefrags); */ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) { + unsigned char *data = skb->data; + BUG_ON(len > skb->len); - skb->len -= len; - BUG_ON(skb->len < skb->data_len); - skb_postpull_rcsum(skb, skb->data, len); - return skb->data += len; + __skb_pull(skb, len); + skb_postpull_rcsum(skb, data, len); + return skb->data; } EXPORT_SYMBOL_GPL(skb_pull_rcsum); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index cce97385f743..7d91f4612ac0 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -458,12 +458,17 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state) static int dsa_slave_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) { - int ret = 0; + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret; switch (attr->id) { case SWITCHDEV_ATTR_PORT_STP_STATE: - if (attr->trans == SWITCHDEV_TRANS_COMMIT) - ret = dsa_slave_stp_update(dev, attr->u.stp_state); + if (attr->trans == SWITCHDEV_TRANS_PREPARE) + ret = ds->drv->port_stp_update ? 0 : -EOPNOTSUPP; + else + ret = ds->drv->port_stp_update(ds, p->port, + attr->u.stp_state); break; default: ret = -EOPNOTSUPP; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 6fcbd215cdbc..690bcbc59f26 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -340,6 +340,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_tun_key.tun_id = 0; + fl4.flowi4_flags = 0; no_addr = idev->ifa_list == NULL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c6ad99ad0ffb..c81deb85acb4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1737,6 +1737,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.flowi4_mark = skb->mark; fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + fl4.flowi4_flags = 0; fl4.daddr = daddr; fl4.saddr = saddr; err = fib_lookup(net, &fl4, &res, 0); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f204089e854c..cb32ce250db0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1193,7 +1193,8 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, fl6->flowi6_iif = LOOPBACK_IFINDEX; - if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) + if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || + fl6->flowi6_oif) flags |= RT6_LOOKUP_F_IFACE; if (!ipv6_addr_any(&fl6->saddr)) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index f6b090df3930..afca2eb4dfa7 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1319,7 +1319,7 @@ static void l2tp_tunnel_del_work(struct work_struct *work) tunnel = container_of(work, struct l2tp_tunnel, del_work); sk = l2tp_tunnel_sock_lookup(tunnel); if (!sk) - return; + goto out; sock = sk->sk_socket; @@ -1341,6 +1341,8 @@ static void l2tp_tunnel_del_work(struct work_struct *work) } l2tp_tunnel_sock_put(sk); +out: + l2tp_tunnel_dec_refcount(tunnel); } /* Create a socket for the tunnel, if one isn't set up by @@ -1636,8 +1638,13 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); */ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { + l2tp_tunnel_inc_refcount(tunnel); l2tp_tunnel_closeall(tunnel); - return (false == queue_work(l2tp_wq, &tunnel->del_work)); + if (false == queue_work(l2tp_wq, &tunnel->del_work)) { + l2tp_tunnel_dec_refcount(tunnel); + return 1; + } + return 0; } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 197c3f59ecbf..b00f1f9611d6 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1208,20 +1208,22 @@ void sctp_assoc_update(struct sctp_association *asoc, * within this document. * * Our basic strategy is to round-robin transports in priorities - * according to sctp_state_prio_map[] e.g., if no such + * according to sctp_trans_score() e.g., if no such * transport with state SCTP_ACTIVE exists, round-robin through * SCTP_UNKNOWN, etc. You get the picture. */ -static const u8 sctp_trans_state_to_prio_map[] = { - [SCTP_ACTIVE] = 3, /* best case */ - [SCTP_UNKNOWN] = 2, - [SCTP_PF] = 1, - [SCTP_INACTIVE] = 0, /* worst case */ -}; - static u8 sctp_trans_score(const struct sctp_transport *trans) { - return sctp_trans_state_to_prio_map[trans->state]; + switch (trans->state) { + case SCTP_ACTIVE: + return 3; /* best case */ + case SCTP_UNKNOWN: + return 2; + case SCTP_PF: + return 1; + default: /* case SCTP_INACTIVE */ + return 0; /* worst case */ + } } static struct sctp_transport *sctp_trans_elect_tie(struct sctp_transport *trans1, diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 35df1266bf07..6098d4c42fa9 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -244,12 +244,13 @@ void sctp_generate_t3_rtx_event(unsigned long peer) int error; struct sctp_transport *transport = (struct sctp_transport *) peer; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); /* Check whether a task is in the sock. */ - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -272,10 +273,10 @@ void sctp_generate_t3_rtx_event(unsigned long peer) transport, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_transport_put(transport); } @@ -285,11 +286,12 @@ out_unlock: static void sctp_generate_timeout_event(struct sctp_association *asoc, sctp_event_timeout_t timeout_type) { - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); int error = 0; - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy: timer %d\n", __func__, timeout_type); @@ -312,10 +314,10 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, (void *)timeout_type, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_association_put(asoc); } @@ -365,10 +367,11 @@ void sctp_generate_heartbeat_event(unsigned long data) int error = 0; struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -388,11 +391,11 @@ void sctp_generate_heartbeat_event(unsigned long data) asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); - if (error) - asoc->base.sk->sk_err = -error; + if (error) + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_transport_put(transport); } @@ -403,10 +406,11 @@ void sctp_generate_proto_unreach_event(unsigned long data) { struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -427,7 +431,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_association_put(asoc); } diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index cb25c89da623..f1e8dafbd507 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -39,25 +39,6 @@ static int fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_create_data_internal *cdata) { - struct ib_device_attr *devattr = &ia->ri_devattr; - struct ib_mr *mr; - - /* Obtain an lkey to use for the regbufs, which are - * protected from remote access. - */ - if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { - ia->ri_dma_lkey = ia->ri_device->local_dma_lkey; - } else { - mr = ib_get_dma_mr(ia->ri_pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(mr)) { - pr_err("%s: ib_get_dma_mr for failed with %lX\n", - __func__, PTR_ERR(mr)); - return -ENOMEM; - } - ia->ri_dma_lkey = ia->ri_dma_mr->lkey; - ia->ri_dma_mr = mr; - } - return 0; } diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index d6653f5d0830..5318951b3b53 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -189,11 +189,6 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct ib_device_attr *devattr = &ia->ri_devattr; int depth, delta; - /* Obtain an lkey to use for the regbufs, which are - * protected from remote access. - */ - ia->ri_dma_lkey = ia->ri_device->local_dma_lkey; - ia->ri_max_frmr_depth = min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, devattr->max_fast_reg_page_list_len); diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c index 72cf8b15bbb4..617b76f22154 100644 --- a/net/sunrpc/xprtrdma/physical_ops.c +++ b/net/sunrpc/xprtrdma/physical_ops.c @@ -23,7 +23,6 @@ static int physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_create_data_internal *cdata) { - struct ib_device_attr *devattr = &ia->ri_devattr; struct ib_mr *mr; /* Obtain an rkey to use for RPC data payloads. @@ -37,15 +36,8 @@ physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, __func__, PTR_ERR(mr)); return -ENOMEM; } - ia->ri_dma_mr = mr; - - /* Obtain an lkey to use for regbufs. - */ - if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) - ia->ri_dma_lkey = ia->ri_device->local_dma_lkey; - else - ia->ri_dma_lkey = ia->ri_dma_mr->lkey; + ia->ri_dma_mr = mr; return 0; } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 682996779970..eb081ad05e33 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1252,7 +1252,7 @@ rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags) goto out_free; iov->length = size; - iov->lkey = ia->ri_dma_lkey; + iov->lkey = ia->ri_pd->local_dma_lkey; rb->rg_size = size; rb->rg_owner = NULL; return rb; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 02512221b8bc..c09414e6f91b 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -65,7 +65,6 @@ struct rpcrdma_ia { struct rdma_cm_id *ri_id; struct ib_pd *ri_pd; struct ib_mr *ri_dma_mr; - u32 ri_dma_lkey; struct completion ri_done; int ri_async_rc; unsigned int ri_max_frmr_depth; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 03ee4d359f6a..ef31b40ad550 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2179,8 +2179,21 @@ unlock: if (UNIXCB(skb).fp) scm.fp = scm_fp_dup(UNIXCB(skb).fp); - sk_peek_offset_fwd(sk, chunk); + if (skip) { + sk_peek_offset_fwd(sk, chunk); + skip -= chunk; + } + if (UNIXCB(skb).fp) + break; + + last = skb; + last_len = skb->len; + unix_state_lock(sk); + skb = skb_peek_next(skb, &sk->sk_receive_queue); + if (skb) + goto again; + unix_state_unlock(sk); break; } } while (size); diff --git a/samples/kprobes/jprobe_example.c b/samples/kprobes/jprobe_example.c index 9119ac6a8270..c285a3b8a9f1 100644 --- a/samples/kprobes/jprobe_example.c +++ b/samples/kprobes/jprobe_example.c @@ -1,13 +1,13 @@ /* * Here's a sample kernel module showing the use of jprobes to dump - * the arguments of do_fork(). + * the arguments of _do_fork(). * * For more information on theory of operation of jprobes, see * Documentation/kprobes.txt * * Build and insert the kernel module as done in the kprobe example. * You will see the trace data in /var/log/messages and on the - * console whenever do_fork() is invoked to create a new process. + * console whenever _do_fork() is invoked to create a new process. * (Some messages may be suppressed if syslogd is configured to * eliminate duplicate messages.) */ @@ -17,13 +17,13 @@ #include <linux/kprobes.h> /* - * Jumper probe for do_fork. + * Jumper probe for _do_fork. * Mirror principle enables access to arguments of the probed routine * from the probe handler. */ -/* Proxy routine having the same arguments as actual do_fork() routine */ -static long jdo_fork(unsigned long clone_flags, unsigned long stack_start, +/* Proxy routine having the same arguments as actual _do_fork() routine */ +static long j_do_fork(unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { @@ -36,9 +36,9 @@ static long jdo_fork(unsigned long clone_flags, unsigned long stack_start, } static struct jprobe my_jprobe = { - .entry = jdo_fork, + .entry = j_do_fork, .kp = { - .symbol_name = "do_fork", + .symbol_name = "_do_fork", }, }; diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c index 366db1a9fb65..727eb21c9c56 100644 --- a/samples/kprobes/kprobe_example.c +++ b/samples/kprobes/kprobe_example.c @@ -1,13 +1,13 @@ /* * NOTE: This example is works on x86 and powerpc. * Here's a sample kernel module showing the use of kprobes to dump a - * stack trace and selected registers when do_fork() is called. + * stack trace and selected registers when _do_fork() is called. * * For more information on theory of operation of kprobes, see * Documentation/kprobes.txt * * You will see the trace data in /var/log/messages and on the console - * whenever do_fork() is invoked to create a new process. + * whenever _do_fork() is invoked to create a new process. */ #include <linux/kernel.h> @@ -16,7 +16,7 @@ /* For each probe you need to allocate a kprobe structure */ static struct kprobe kp = { - .symbol_name = "do_fork", + .symbol_name = "_do_fork", }; /* kprobe pre_handler: called just before the probed instruction is executed */ diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c index 1041b6731598..ebb1d1aed547 100644 --- a/samples/kprobes/kretprobe_example.c +++ b/samples/kprobes/kretprobe_example.c @@ -7,7 +7,7 @@ * * usage: insmod kretprobe_example.ko func=<func_name> * - * If no func_name is specified, do_fork is instrumented + * If no func_name is specified, _do_fork is instrumented * * For more information on theory of operation of kretprobes, see * Documentation/kprobes.txt @@ -25,7 +25,7 @@ #include <linux/limits.h> #include <linux/sched.h> -static char func_name[NAME_MAX] = "do_fork"; +static char func_name[NAME_MAX] = "_do_fork"; module_param_string(func, func_name, NAME_MAX, S_IRUGO); MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the" " function's execution time"); diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c index 6ce5945a0b89..b071bf476fea 100644 --- a/scripts/extract-cert.c +++ b/scripts/extract-cert.c @@ -17,13 +17,9 @@ #include <stdint.h> #include <stdbool.h> #include <string.h> -#include <getopt.h> #include <err.h> -#include <arpa/inet.h> #include <openssl/bio.h> -#include <openssl/evp.h> #include <openssl/pem.h> -#include <openssl/pkcs7.h> #include <openssl/err.h> #include <openssl/engine.h> diff --git a/scripts/sign-file.c b/scripts/sign-file.c index c3899ca4811c..250a7a645033 100755 --- a/scripts/sign-file.c +++ b/scripts/sign-file.c @@ -20,13 +20,34 @@ #include <getopt.h> #include <err.h> #include <arpa/inet.h> +#include <openssl/opensslv.h> #include <openssl/bio.h> #include <openssl/evp.h> #include <openssl/pem.h> -#include <openssl/cms.h> #include <openssl/err.h> #include <openssl/engine.h> +/* + * Use CMS if we have openssl-1.0.0 or newer available - otherwise we have to + * assume that it's not available and its header file is missing and that we + * should use PKCS#7 instead. Switching to the older PKCS#7 format restricts + * the options we have on specifying the X.509 certificate we want. + * + * Further, older versions of OpenSSL don't support manually adding signers to + * the PKCS#7 message so have to accept that we get a certificate included in + * the signature message. Nor do such older versions of OpenSSL support + * signing with anything other than SHA1 - so we're stuck with that if such is + * the case. + */ +#if OPENSSL_VERSION_NUMBER < 0x10000000L +#define USE_PKCS7 +#endif +#ifndef USE_PKCS7 +#include <openssl/cms.h> +#else +#include <openssl/pkcs7.h> +#endif + struct module_signature { uint8_t algo; /* Public-key crypto algorithm [0] */ uint8_t hash; /* Digest algorithm [0] */ @@ -110,30 +131,42 @@ int main(int argc, char **argv) struct module_signature sig_info = { .id_type = PKEY_ID_PKCS7 }; char *hash_algo = NULL; char *private_key_name, *x509_name, *module_name, *dest_name; - bool save_cms = false, replace_orig; + bool save_sig = false, replace_orig; bool sign_only = false; unsigned char buf[4096]; - unsigned long module_size, cms_size; - unsigned int use_keyid = 0, use_signed_attrs = CMS_NOATTR; + unsigned long module_size, sig_size; + unsigned int use_signed_attrs; const EVP_MD *digest_algo; EVP_PKEY *private_key; +#ifndef USE_PKCS7 CMS_ContentInfo *cms; + unsigned int use_keyid = 0; +#else + PKCS7 *pkcs7; +#endif X509 *x509; BIO *b, *bd = NULL, *bm; int opt, n; - OpenSSL_add_all_algorithms(); ERR_load_crypto_strings(); ERR_clear_error(); key_pass = getenv("KBUILD_SIGN_PIN"); +#ifndef USE_PKCS7 + use_signed_attrs = CMS_NOATTR; +#else + use_signed_attrs = PKCS7_NOATTR; +#endif + do { opt = getopt(argc, argv, "dpk"); switch (opt) { - case 'p': save_cms = true; break; - case 'd': sign_only = true; save_cms = true; break; + case 'p': save_sig = true; break; + case 'd': sign_only = true; save_sig = true; break; +#ifndef USE_PKCS7 case 'k': use_keyid = CMS_USE_KEYID; break; +#endif case -1: break; default: format(); } @@ -157,6 +190,14 @@ int main(int argc, char **argv) replace_orig = true; } +#ifdef USE_PKCS7 + if (strcmp(hash_algo, "sha1") != 0) { + fprintf(stderr, "sign-file: %s only supports SHA1 signing\n", + OPENSSL_VERSION_TEXT); + exit(3); + } +#endif + /* Read the private key and the X.509 cert the PKCS#7 message * will point to. */ @@ -213,7 +254,8 @@ int main(int argc, char **argv) bm = BIO_new_file(module_name, "rb"); ERR(!bm, "%s", module_name); - /* Load the CMS message from the digest buffer. */ +#ifndef USE_PKCS7 + /* Load the signature message from the digest buffer. */ cms = CMS_sign(NULL, NULL, NULL, NULL, CMS_NOCERTS | CMS_PARTIAL | CMS_BINARY | CMS_DETACHED | CMS_STREAM); ERR(!cms, "CMS_sign"); @@ -221,17 +263,31 @@ int main(int argc, char **argv) ERR(!CMS_add1_signer(cms, x509, private_key, digest_algo, CMS_NOCERTS | CMS_BINARY | CMS_NOSMIMECAP | use_keyid | use_signed_attrs), - "CMS_sign_add_signer"); + "CMS_add1_signer"); ERR(CMS_final(cms, bm, NULL, CMS_NOCERTS | CMS_BINARY) < 0, "CMS_final"); - if (save_cms) { - char *cms_name; +#else + pkcs7 = PKCS7_sign(x509, private_key, NULL, bm, + PKCS7_NOCERTS | PKCS7_BINARY | + PKCS7_DETACHED | use_signed_attrs); + ERR(!pkcs7, "PKCS7_sign"); +#endif - ERR(asprintf(&cms_name, "%s.p7s", module_name) < 0, "asprintf"); - b = BIO_new_file(cms_name, "wb"); - ERR(!b, "%s", cms_name); - ERR(i2d_CMS_bio_stream(b, cms, NULL, 0) < 0, "%s", cms_name); + if (save_sig) { + char *sig_file_name; + + ERR(asprintf(&sig_file_name, "%s.p7s", module_name) < 0, + "asprintf"); + b = BIO_new_file(sig_file_name, "wb"); + ERR(!b, "%s", sig_file_name); +#ifndef USE_PKCS7 + ERR(i2d_CMS_bio_stream(b, cms, NULL, 0) < 0, + "%s", sig_file_name); +#else + ERR(i2d_PKCS7_bio(b, pkcs7) < 0, + "%s", sig_file_name); +#endif BIO_free(b); } @@ -247,9 +303,13 @@ int main(int argc, char **argv) ERR(n < 0, "%s", module_name); module_size = BIO_number_written(bd); +#ifndef USE_PKCS7 ERR(i2d_CMS_bio_stream(bd, cms, NULL, 0) < 0, "%s", dest_name); - cms_size = BIO_number_written(bd) - module_size; - sig_info.sig_len = htonl(cms_size); +#else + ERR(i2d_PKCS7_bio(bd, pkcs7) < 0, "%s", dest_name); +#endif + sig_size = BIO_number_written(bd) - module_size; + sig_info.sig_len = htonl(sig_size); ERR(BIO_write(bd, &sig_info, sizeof(sig_info)) < 0, "%s", dest_name); ERR(BIO_write(bd, magic_number, sizeof(magic_number) - 1) < 0, "%s", dest_name); diff --git a/security/keys/gc.c b/security/keys/gc.c index c7952375ac53..39eac1fd5706 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -134,6 +134,10 @@ static noinline void key_gc_unused_keys(struct list_head *keys) kdebug("- %u", key->serial); key_check(key); + /* Throw away the key data */ + if (key->type->destroy) + key->type->destroy(key); + security_key_free(key); /* deal with the user's key tracking and quota */ @@ -148,10 +152,6 @@ static noinline void key_gc_unused_keys(struct list_head *keys) if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) atomic_dec(&key->user->nikeys); - /* now throw away the key memory */ - if (key->type->destroy) - key->type->destroy(key); - key_user_put(key->user); kfree(key->description); diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 2975632d51e2..c8fe6d177119 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -41,6 +41,7 @@ FEATURE_TESTS ?= \ libelf-getphdrnum \ libelf-mmap \ libnuma \ + numa_num_possible_cpus \ libperl \ libpython \ libpython-version \ @@ -51,7 +52,8 @@ FEATURE_TESTS ?= \ timerfd \ libdw-dwarf-unwind \ zlib \ - lzma + lzma \ + get_cpuid FEATURE_DISPLAY ?= \ dwarf \ @@ -61,13 +63,15 @@ FEATURE_DISPLAY ?= \ libbfd \ libelf \ libnuma \ + numa_num_possible_cpus \ libperl \ libpython \ libslang \ libunwind \ libdw-dwarf-unwind \ zlib \ - lzma + lzma \ + get_cpuid # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. # If in the future we need per-feature checks/flags for features not diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 74ca42093d70..e43a2971bf56 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -19,6 +19,7 @@ FILES= \ test-libelf-getphdrnum.bin \ test-libelf-mmap.bin \ test-libnuma.bin \ + test-numa_num_possible_cpus.bin \ test-libperl.bin \ test-libpython.bin \ test-libpython-version.bin \ @@ -34,7 +35,8 @@ FILES= \ test-compile-x32.bin \ test-zlib.bin \ test-lzma.bin \ - test-bpf.bin + test-bpf.bin \ + test-get_cpuid.bin CC := $(CROSS_COMPILE)gcc -MD PKG_CONFIG := $(CROSS_COMPILE)pkg-config @@ -87,6 +89,9 @@ test-libelf-getphdrnum.bin: test-libnuma.bin: $(BUILD) -lnuma +test-numa_num_possible_cpus.bin: + $(BUILD) -lnuma + test-libunwind.bin: $(BUILD) -lelf @@ -162,6 +167,9 @@ test-zlib.bin: test-lzma.bin: $(BUILD) -llzma +test-get_cpuid.bin: + $(BUILD) + test-bpf.bin: $(BUILD) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 84689a67814a..33cf6f20bd4e 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -77,6 +77,10 @@ # include "test-libnuma.c" #undef main +#define main main_test_numa_num_possible_cpus +# include "test-numa_num_possible_cpus.c" +#undef main + #define main main_test_timerfd # include "test-timerfd.c" #undef main @@ -117,6 +121,10 @@ # include "test-lzma.c" #undef main +#define main main_test_get_cpuid +# include "test-get_cpuid.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -136,6 +144,7 @@ int main(int argc, char *argv[]) main_test_libbfd(); main_test_backtrace(); main_test_libnuma(); + main_test_numa_num_possible_cpus(); main_test_timerfd(); main_test_stackprotector_all(); main_test_libdw_dwarf_unwind(); @@ -143,6 +152,7 @@ int main(int argc, char *argv[]) main_test_zlib(); main_test_pthread_attr_setaffinity_np(); main_test_lzma(); + main_test_get_cpuid(); return 0; } diff --git a/tools/build/feature/test-get_cpuid.c b/tools/build/feature/test-get_cpuid.c new file mode 100644 index 000000000000..d7a2c407130d --- /dev/null +++ b/tools/build/feature/test-get_cpuid.c @@ -0,0 +1,7 @@ +#include <cpuid.h> + +int main(void) +{ + unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; + return __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); +} diff --git a/tools/build/feature/test-numa_num_possible_cpus.c b/tools/build/feature/test-numa_num_possible_cpus.c new file mode 100644 index 000000000000..2606e94b0659 --- /dev/null +++ b/tools/build/feature/test-numa_num_possible_cpus.c @@ -0,0 +1,6 @@ +#include <numa.h> + +int main(void) +{ + return numa_num_possible_cpus(); +} diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 4d885934b919..cf42b090477b 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3795,7 +3795,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, struct format_field *field; struct printk_map *printk; long long val, fval; - unsigned long addr; + unsigned long long addr; char *str; unsigned char *hex; int print; @@ -3828,13 +3828,30 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, */ if (!(field->flags & FIELD_IS_ARRAY) && field->size == pevent->long_size) { - addr = *(unsigned long *)(data + field->offset); + + /* Handle heterogeneous recording and processing + * architectures + * + * CASE I: + * Traces recorded on 32-bit devices (32-bit + * addressing) and processed on 64-bit devices: + * In this case, only 32 bits should be read. + * + * CASE II: + * Traces recorded on 64 bit devices and processed + * on 32-bit devices: + * In this case, 64 bits must be read. + */ + addr = (pevent->long_size == 8) ? + *(unsigned long long *)(data + field->offset) : + (unsigned long long)*(unsigned int *)(data + field->offset); + /* Check if it matches a print format */ printk = find_printk(pevent, addr); if (printk) trace_seq_puts(s, printk->printk); else - trace_seq_printf(s, "%lx", addr); + trace_seq_printf(s, "%llx", addr); break; } str = malloc(len + 1); diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 4a0501d7a3b4..c94c9de3173e 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -364,21 +364,6 @@ cyc_thresh Specifies how frequently CYC packets are produced - see cyc CYC packets are not requested by default. -no_force_psb This is a driver option and is not in the IA32_RTIT_CTL MSR. - - It stops the driver resetting the byte count to zero whenever - enabling the trace (for example on context switches) which in - turn results in no PSB being forced. However some processors - will produce a PSB anyway. - - In any case, there is still a PSB when the trace is enabled for - the first time. - - no_force_psb can be used to slightly decrease the trace size but - may make it harder for the decoder to recover from errors. - - no_force_psb is not selected by default. - new snapshot option ------------------- diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 827557fc7511..38a08539f4bf 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -573,9 +573,14 @@ ifndef NO_LIBNUMA msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev); NO_LIBNUMA := 1 else - CFLAGS += -DHAVE_LIBNUMA_SUPPORT - EXTLIBS += -lnuma - $(call detected,CONFIG_NUMA) + ifeq ($(feature-numa_num_possible_cpus), 0) + msg := $(warning Old numa library found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev >= 2.0.8); + NO_LIBNUMA := 1 + else + CFLAGS += -DHAVE_LIBNUMA_SUPPORT + EXTLIBS += -lnuma + $(call detected,CONFIG_NUMA) + endif endif endif @@ -621,8 +626,13 @@ ifdef LIBBABELTRACE endif ifndef NO_AUXTRACE - $(call detected,CONFIG_AUXTRACE) - CFLAGS += -DHAVE_AUXTRACE_SUPPORT + ifeq ($(feature-get_cpuid), 0) + msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc); + NO_AUXTRACE := 1 + else + $(call detected,CONFIG_AUXTRACE) + CFLAGS += -DHAVE_AUXTRACE_SUPPORT + endif endif # Among the variables below, these: diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index eb5f18b75402..c6f9af78f6f5 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -270,12 +270,13 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso) int ret = 0; if (module) { - list_for_each_entry(dso, &host_machine->dsos.head, node) { - if (!dso->kernel) - continue; - if (strncmp(dso->short_name + 1, module, - dso->short_name_len - 2) == 0) - goto found; + char module_name[128]; + + snprintf(module_name, sizeof(module_name), "[%s]", module); + map = map_groups__find_by_name(&host_machine->kmaps, MAP__FUNCTION, module_name); + if (map) { + dso = map->dso; + goto found; } pr_debug("Failed to find module %s.\n", module); return -ENOENT; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8a4537ee9bc3..fc3f7c922f99 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1580,7 +1580,10 @@ static int __perf_session__process_events(struct perf_session *session, file_offset = page_offset; head = data_offset - page_offset; - if (data_size && (data_offset + data_size < file_size)) + if (data_size == 0) + goto out; + + if (data_offset + data_size < file_size) file_size = data_offset + data_size; ui_progress__init(&prog, file_size, "Processing events..."); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 415c359de465..2d065d065b67 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -196,7 +196,8 @@ static void zero_per_pkg(struct perf_evsel *counter) memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); } -static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) +static int check_per_pkg(struct perf_evsel *counter, + struct perf_counts_values *vals, int cpu, bool *skip) { unsigned long *mask = counter->per_pkg_mask; struct cpu_map *cpus = perf_evsel__cpus(counter); @@ -218,6 +219,17 @@ static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) counter->per_pkg_mask = mask; } + /* + * we do not consider an event that has not run as a good + * instance to mark a package as used (skip=1). Otherwise + * we may run into a situation where the first CPU in a package + * is not running anything, yet the second is, and this function + * would mark the package as used after the first CPU and would + * not read the values from the second CPU. + */ + if (!(vals->run && vals->ena)) + return 0; + s = cpu_map__get_socket(cpus, cpu); if (s < 0) return -1; @@ -235,7 +247,7 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel static struct perf_counts_values zero; bool skip = false; - if (check_per_pkg(evsel, cpu, &skip)) { + if (check_per_pkg(evsel, count, cpu, &skip)) { pr_err("failed to read per-pkg counter\n"); return -1; } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 53bb5f59ec58..475d88d0a1c9 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -38,7 +38,7 @@ static inline char *bfd_demangle(void __maybe_unused *v, #endif #ifndef HAVE_ELF_GETPHDRNUM_SUPPORT -int elf_getphdrnum(Elf *elf, size_t *dst) +static int elf_getphdrnum(Elf *elf, size_t *dst) { GElf_Ehdr gehdr; GElf_Ehdr *ehdr; @@ -1271,8 +1271,6 @@ out_close: static int kcore__init(struct kcore *kcore, char *filename, int elfclass, bool temp) { - GElf_Ehdr *ehdr; - kcore->elfclass = elfclass; if (temp) @@ -1289,9 +1287,7 @@ static int kcore__init(struct kcore *kcore, char *filename, int elfclass, if (!gelf_newehdr(kcore->elf, elfclass)) goto out_end; - ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr); - if (!ehdr) - goto out_end; + memset(&kcore->ehdr, 0, sizeof(GElf_Ehdr)); return 0; @@ -1348,23 +1344,18 @@ static int kcore__copy_hdr(struct kcore *from, struct kcore *to, size_t count) static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset, u64 addr, u64 len) { - GElf_Phdr gphdr; - GElf_Phdr *phdr; - - phdr = gelf_getphdr(kcore->elf, idx, &gphdr); - if (!phdr) - return -1; - - phdr->p_type = PT_LOAD; - phdr->p_flags = PF_R | PF_W | PF_X; - phdr->p_offset = offset; - phdr->p_vaddr = addr; - phdr->p_paddr = 0; - phdr->p_filesz = len; - phdr->p_memsz = len; - phdr->p_align = page_size; - - if (!gelf_update_phdr(kcore->elf, idx, phdr)) + GElf_Phdr phdr = { + .p_type = PT_LOAD, + .p_flags = PF_R | PF_W | PF_X, + .p_offset = offset, + .p_vaddr = addr, + .p_paddr = 0, + .p_filesz = len, + .p_memsz = len, + .p_align = page_size, + }; + + if (!gelf_update_phdr(kcore->elf, idx, &phdr)) return -1; return 0; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 7acafb3c5592..c2cd9bf2348b 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -709,7 +709,7 @@ bool find_process(const char *name) dir = opendir(procfs__mountpoint()); if (!dir) - return -1; + return false; /* Walk through the directory. */ while (ret && (d = readdir(dir)) != NULL) { diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9655cb49c7cb..bde0ef1a63df 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -71,8 +71,11 @@ unsigned int extra_msr_offset32; unsigned int extra_msr_offset64; unsigned int extra_delta_offset32; unsigned int extra_delta_offset64; +unsigned int aperf_mperf_multiplier = 1; int do_smi; double bclk; +double base_hz; +double tsc_tweak = 1.0; unsigned int show_pkg; unsigned int show_core; unsigned int show_cpu; @@ -502,7 +505,7 @@ int format_counters(struct thread_data *t, struct core_data *c, /* %Busy */ if (has_aperf) { if (!skip_c0) - outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc); + outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); else outp += sprintf(outp, "********"); } @@ -510,7 +513,7 @@ int format_counters(struct thread_data *t, struct core_data *c, /* Bzy_MHz */ if (has_aperf) outp += sprintf(outp, "%8.0f", - 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); + 1.0 * t->tsc * tsc_tweak / units * t->aperf / t->mperf / interval_float); /* TSC_MHz */ outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); @@ -984,6 +987,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -3; if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) return -4; + t->aperf = t->aperf * aperf_mperf_multiplier; + t->mperf = t->mperf * aperf_mperf_multiplier; } if (do_smi) { @@ -1149,6 +1154,19 @@ int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; + +static void +calculate_tsc_tweak() +{ + unsigned long long msr; + unsigned int base_ratio; + + get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); + base_ratio = (msr >> 8) & 0xFF; + base_hz = base_ratio * bclk * 1000000; + tsc_tweak = base_hz / tsc_hz; +} + static void dump_nhm_platform_info(void) { @@ -1926,8 +1944,6 @@ int has_config_tdp(unsigned int family, unsigned int model) switch (model) { case 0x3A: /* IVB */ - case 0x3E: /* IVB Xeon */ - case 0x3C: /* HSW */ case 0x3F: /* HSX */ case 0x45: /* HSW */ @@ -2543,6 +2559,13 @@ int is_knl(unsigned int family, unsigned int model) return 0; } +unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model) +{ + if (is_knl(family, model)) + return 1024; + return 1; +} + #define SLM_BCLK_FREQS 5 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; @@ -2744,6 +2767,9 @@ void process_cpuid() } } + if (has_aperf) + aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model); + do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model); do_snb_cstates = has_snb_msrs(family, model); do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2); @@ -2762,6 +2788,9 @@ void process_cpuid() if (debug) dump_cstate_pstate_config_info(); + if (has_skl_msrs(family, model)) + calculate_tsc_tweak(); + return; } @@ -3090,7 +3119,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(stderr, "turbostat version 4.7 17-June, 2015" + fprintf(stderr, "turbostat version 4.8 26-Sep, 2015" " - Len Brown <lenb@kernel.org>\n"); } |