From 3b5d56b9317fa7b5407dff1aa7b115bf6cdbd494 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Sat, 10 Mar 2012 14:37:26 -0500 Subject: kvmclock: Add functions to check if the host has stopped the vm When a host stops or suspends a VM it will set a flag to show this. The watchdog will use these functions to determine if a softlockup is real, or the result of a suspended VM. Signed-off-by: Eric B Munson asm-generic changes Acked-by: Arnd Bergmann Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/asm-generic/kvm_para.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/asm-generic/kvm_para.h (limited to 'include/asm-generic') diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h new file mode 100644 index 000000000000..05ef7e705939 --- /dev/null +++ b/include/asm-generic/kvm_para.h @@ -0,0 +1,14 @@ +#ifndef _ASM_GENERIC_KVM_PARA_H +#define _ASM_GENERIC_KVM_PARA_H + + +/* + * This function is used by architectures that support kvm to avoid issuing + * false soft lockup messages. + */ +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} + +#endif -- cgit v1.2.3 From 07bd18d00d5dcf84eb22f8120f47f09c3d8fe27d Mon Sep 17 00:00:00 2001 From: Will Drewry Date: Thu, 12 Apr 2012 16:47:55 -0500 Subject: asm/syscall.h: add syscall_get_arch Adds a stub for a function that will return the AUDIT_ARCH_* value appropriate to the supplied task based on the system call convention. For audit's use, the value can generally be hard-coded at the audit-site. However, for other functionality not inlined into syscall entry/exit, this makes that information available. seccomp_filter is the first planned consumer and, as such, the comment indicates a tie to CONFIG_HAVE_ARCH_SECCOMP_FILTER. Suggested-by: Roland McGrath Signed-off-by: Will Drewry Acked-by: Serge Hallyn Acked-by: Eric Paris v18: comment and change reword and rebase. v14: rebase/nochanges v13: rebase on to 88ebdda6159ffc15699f204c33feb3e431bf9bdc v12: rebase on to linux-next v11: fixed improper return type v10: introduced Signed-off-by: James Morris --- include/asm-generic/syscall.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/asm-generic') diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index 5c122ae6bfa6..5b09392db673 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -142,4 +142,18 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, unsigned int i, unsigned int n, const unsigned long *args); +/** + * syscall_get_arch - return the AUDIT_ARCH for the current system call + * @task: task of interest, must be in system call entry tracing + * @regs: task_pt_regs() of @task + * + * Returns the AUDIT_ARCH_* based on the system call convention in use. + * + * It's only valid to call this when @task is stopped on entry to a system + * call, due to %TIF_SYSCALL_TRACE, %TIF_SYSCALL_AUDIT, or %TIF_SECCOMP. + * + * Architectures which permit CONFIG_HAVE_ARCH_SECCOMP_FILTER must + * provide an implementation of this. + */ +int syscall_get_arch(struct task_struct *task, struct pt_regs *regs); #endif /* _ASM_SYSCALL_H */ -- cgit v1.2.3 From a0727e8ce513fe6890416da960181ceb10fbfae6 Mon Sep 17 00:00:00 2001 From: Will Drewry Date: Thu, 12 Apr 2012 16:48:00 -0500 Subject: signal, x86: add SIGSYS info and make it synchronous. This change enables SIGSYS, defines _sigfields._sigsys, and adds x86 (compat) arch support. _sigsys defines fields which allow a signal handler to receive the triggering system call number, the relevant AUDIT_ARCH_* value for that number, and the address of the callsite. SIGSYS is added to the SYNCHRONOUS_MASK because it is desirable for it to have setup_frame() called for it. The goal is to ensure that ucontext_t reflects the machine state from the time-of-syscall and not from another signal handler. The first consumer of SIGSYS would be seccomp filter. In particular, a filter program could specify a new return value, SECCOMP_RET_TRAP, which would result in the system call being denied and the calling thread signaled. This also means that implementing arch-specific support can be dependent upon HAVE_ARCH_SECCOMP_FILTER. Suggested-by: H. Peter Anvin Signed-off-by: Will Drewry Acked-by: Serge Hallyn Reviewed-by: H. Peter Anvin Acked-by: Eric Paris v18: - added acked by, rebase v17: - rebase and reviewed-by addition v14: - rebase/nochanges v13: - rebase on to 88ebdda6159ffc15699f204c33feb3e431bf9bdc v12: - reworded changelog (oleg@redhat.com) v11: - fix dropped words in the change description - added fallback copy_siginfo support. - added __ARCH_SIGSYS define to allow stepped arch support. v10: - first version based on suggestion Signed-off-by: James Morris --- include/asm-generic/siginfo.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/asm-generic') diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h index 0dd4e87f6fba..31306f55eb02 100644 --- a/include/asm-generic/siginfo.h +++ b/include/asm-generic/siginfo.h @@ -90,9 +90,18 @@ typedef struct siginfo { __ARCH_SI_BAND_T _band; /* POLL_IN, POLL_OUT, POLL_MSG */ int _fd; } _sigpoll; + + /* SIGSYS */ + struct { + void __user *_call_addr; /* calling insn */ + int _syscall; /* triggering system call number */ + unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + } _sigsys; } _sifields; } siginfo_t; +/* If the arch shares siginfo, then it has SIGSYS. */ +#define __ARCH_SIGSYS #endif /* @@ -116,6 +125,11 @@ typedef struct siginfo { #define si_addr_lsb _sifields._sigfault._addr_lsb #define si_band _sifields._sigpoll._band #define si_fd _sifields._sigpoll._fd +#ifdef __ARCH_SIGSYS +#define si_call_addr _sifields._sigsys._call_addr +#define si_syscall _sifields._sigsys._syscall +#define si_arch _sifields._sigsys._arch +#endif #ifdef __KERNEL__ #define __SI_MASK 0xffff0000u @@ -126,6 +140,7 @@ typedef struct siginfo { #define __SI_CHLD (4 << 16) #define __SI_RT (5 << 16) #define __SI_MESGQ (6 << 16) +#define __SI_SYS (7 << 16) #define __SI_CODE(T,N) ((T) | ((N) & 0xffff)) #else #define __SI_KILL 0 @@ -135,6 +150,7 @@ typedef struct siginfo { #define __SI_CHLD 0 #define __SI_RT 0 #define __SI_MESGQ 0 +#define __SI_SYS 0 #define __SI_CODE(T,N) (N) #endif @@ -231,6 +247,12 @@ typedef struct siginfo { #define POLL_HUP (__SI_POLL|6) /* device disconnected */ #define NSIGPOLL 6 +/* + * SIGSYS si_codes + */ +#define SYS_SECCOMP (__SI_SYS|1) /* seccomp triggered */ +#define NSIGSYS 1 + /* * sigevent definitions * -- cgit v1.2.3 From bb6ea4301a1109afdacaee576fedbfcd7152fc86 Mon Sep 17 00:00:00 2001 From: Will Drewry Date: Thu, 12 Apr 2012 16:48:01 -0500 Subject: seccomp: Add SECCOMP_RET_TRAP Adds a new return value to seccomp filters that triggers a SIGSYS to be delivered with the new SYS_SECCOMP si_code. This allows in-process system call emulation, including just specifying an errno or cleanly dumping core, rather than just dying. Suggested-by: Markus Gutschke Suggested-by: Julien Tinnes Signed-off-by: Will Drewry Acked-by: Eric Paris v18: - acked-by, rebase - don't mention secure_computing_int() anymore v15: - use audit_seccomp/skip - pad out error spacing; clean up switch (indan@nul.nu) v14: - n/a v13: - rebase on to 88ebdda6159ffc15699f204c33feb3e431bf9bdc v12: - rebase on to linux-next v11: - clarify the comment (indan@nul.nu) - s/sigtrap/sigsys v10: - use SIGSYS, syscall_get_arch, updates arch/Kconfig note suggested-by (though original suggestion had other behaviors) v9: - changes to SIGILL v8: - clean up based on changes to dependent patches v7: - introduction Signed-off-by: James Morris --- include/asm-generic/siginfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h index 31306f55eb02..af5d0350f84c 100644 --- a/include/asm-generic/siginfo.h +++ b/include/asm-generic/siginfo.h @@ -93,7 +93,7 @@ typedef struct siginfo { /* SIGSYS */ struct { - void __user *_call_addr; /* calling insn */ + void __user *_call_addr; /* calling user insn */ int _syscall; /* triggering system call number */ unsigned int _arch; /* AUDIT_ARCH_* of syscall */ } _sigsys; -- cgit v1.2.3 From 1f15d10984c854e077da5aa1a23f901496b49773 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 20 Apr 2012 18:21:46 -0300 Subject: KVM: add kvm_arch_para_features stub to asm-generic/kvm_para.h Needed by kvm_para_has_feature(). Reported-by: Stephen Rothwell Signed-off-by: Marcelo Tosatti --- include/asm-generic/kvm_para.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/asm-generic') diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h index 05ef7e705939..9a7bbadb688d 100644 --- a/include/asm-generic/kvm_para.h +++ b/include/asm-generic/kvm_para.h @@ -11,4 +11,9 @@ static inline bool kvm_check_and_clear_guest_paused(void) return false; } +static inline unsigned int kvm_arch_para_features(void) +{ + return 0; +} + #endif -- cgit v1.2.3 From 284f5f9dbac170b054c1e386ef92cbf654e91bba Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 30 Apr 2012 15:21:02 -0600 Subject: PCI: work around Stratus ftServer broken PCIe hierarchy A PCIe downstream port is a P2P bridge. Its secondary interface is a link that should lead only to device 0 (unless ARI is enabled)[1], so we don't probe for non-zero device numbers. Some Stratus ftServer systems have a PCIe downstream port (02:00.0) that leads to both an upstream port (03:00.0) and a downstream port (03:01.0), and 03:01.0 has important devices below it: [0000:02]-+-00.0-[03-3c]--+-00.0-[04-09]--... \-01.0-[0a-0d]--+-[USB] +-[NIC] +-... Previously, we didn't enumerate device 03:01.0, so USB and the network didn't work. This patch adds a DMI quirk to scan all device numbers, not just 0, below a downstream port. Based on a patch by Prarit Bhargava. [1] PCIe spec r3.0, sec 7.3.1 CC: Myron Stowe CC: Don Dutile CC: James Paradis CC: Matthew Wilcox CC: Jesse Barnes CC: Prarit Bhargava Signed-off-by: Bjorn Helgaas --- include/asm-generic/pci-bridge.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/asm-generic') diff --git a/include/asm-generic/pci-bridge.h b/include/asm-generic/pci-bridge.h index a5b5d5a89a4f..20db2e5a0a69 100644 --- a/include/asm-generic/pci-bridge.h +++ b/include/asm-generic/pci-bridge.h @@ -30,6 +30,12 @@ enum { PCI_ENABLE_PROC_DOMAINS = 0x00000010, /* ... except for domain 0 */ PCI_COMPAT_DOMAIN_0 = 0x00000020, + + /* PCIe downstream ports are bridges that normally lead to only a + * device 0, but if this is set, we scan all possible devices, not + * just device 0. + */ + PCI_SCAN_ALL_PCIE_DEVS = 0x00000040, }; #ifdef CONFIG_PCI -- cgit v1.2.3 From 9fd49328fc2a1cbfea542bcbcf004b5c81dc495b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Apr 2012 22:32:06 -0400 Subject: ftrace: Sort all function addresses, not just per page Instead of just sorting the ip's of the functions per ftrace page, sort the entire list before adding them to the ftrace pages. This will allow the bsearch algorithm to be sped up as it can also sort by pages, not just records within a page. Signed-off-by: Steven Rostedt --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8aeadf6b553a..4e2e1cc505ab 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -486,8 +486,8 @@ CPU_DISCARD(init.data) \ MEM_DISCARD(init.data) \ KERNEL_CTORS() \ - *(.init.rodata) \ MCOUNT_REC() \ + *(.init.rodata) \ FTRACE_EVENTS() \ TRACE_SYSCALLS() \ DEV_DISCARD(init.rodata) \ -- cgit v1.2.3 From 09d71ff19404b3957fab6de942fb8026ccfd8524 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 2 May 2012 12:46:46 +0100 Subject: gpiolib: Implement devm_gpio_request_one() Allow drivers to use the modern request and configure idiom together with devres. As with plain gpio_request() and gpio_request_one() we can't implement the old school version in terms of _one() as this would force the explicit selection of a direction in gpio_request() which could break systems if we pick the wrong one. Implementing devm_gpio_request_one() in terms of devm_gpio_request() would needlessly complicate things or lead to duplication from the unmanaged version depending on how it's done. Signed-off-by: Mark Brown Acked-by: Linus Walleij Signed-off-by: Grant Likely --- include/asm-generic/gpio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/asm-generic') diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index 5f52690c3c8f..4ead12332c66 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -179,6 +179,8 @@ extern void gpio_free_array(const struct gpio *array, size_t num); /* bindings for managed devices that want to request gpios */ int devm_gpio_request(struct device *dev, unsigned gpio, const char *label); +int devm_gpio_request_one(struct device *dev, unsigned gpio, + unsigned long flags, const char *label); void devm_gpio_free(struct device *dev, unsigned int gpio); #ifdef CONFIG_GPIO_SYSFS -- cgit v1.2.3 From 3d0f7cf0f3633f92ddeb767eb59cab73963d4dee Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 17 May 2012 13:54:40 -0600 Subject: gpio: Adjust of_xlate API to support multiple GPIO chips This patch changes the of_xlate API to make it possible for multiple gpio_chips to refer to the same device tree node. This is useful for banked GPIO controllers that use multiple gpio_chips for a single device. With this change the core code will try calling of_xlate on each gpio_chip that references the device_node and will return the gpio number for the first one to return 'true'. Tested-by: Roland Stigge Acked-by: Arnd Bergmann Signed-off-by: Grant Likely --- include/asm-generic/gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index 4ead12332c66..1ba08f0e49a3 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -144,7 +144,7 @@ extern int gpiochip_add(struct gpio_chip *chip); extern int __must_check gpiochip_remove(struct gpio_chip *chip); extern struct gpio_chip *gpiochip_find(const void *data, int (*match)(struct gpio_chip *chip, - const void *data)); + void *data)); /* Always use the library code for GPIO management calls, -- cgit v1.2.3 From 07ce8ec7308ab3fa55fe2861671b157f857fff58 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Fri, 18 May 2012 23:01:05 -0600 Subject: gpiolib: Remove 'const' from data argument of gpiochip_find() Commit 3d0f7cf0 "gpio: Adjust of_xlate API to support multiple GPIO chips" changed the api of gpiochip_find to drop const from the data parameter of the match hook, but didn't also drop const from data causing a build warning. Signed-off-by: Grant Likely --- include/asm-generic/gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index 1ba08f0e49a3..365ea09ed3b0 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -142,7 +142,7 @@ extern int __must_check gpiochip_reserve(int start, int ngpio); /* add/remove chips */ extern int gpiochip_add(struct gpio_chip *chip); extern int __must_check gpiochip_remove(struct gpio_chip *chip); -extern struct gpio_chip *gpiochip_find(const void *data, +extern struct gpio_chip *gpiochip_find(void *data, int (*match)(struct gpio_chip *chip, void *data)); -- cgit v1.2.3 From bca0fa5f12a6744a2b2e53154af65a51402b3426 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 23 Mar 2012 13:05:14 +0100 Subject: common: add dma_mmap_from_coherent() function Add a common helper for dma-mapping core for mapping a coherent buffer to userspace. Reported-by: Subash Patel Signed-off-by: Marek Szyprowski Acked-by: Kyungmin Park Tested-By: Subash Patel --- include/asm-generic/dma-coherent.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/dma-coherent.h b/include/asm-generic/dma-coherent.h index 85a3ffaa0242..abfb2682de7f 100644 --- a/include/asm-generic/dma-coherent.h +++ b/include/asm-generic/dma-coherent.h @@ -3,13 +3,15 @@ #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT /* - * These two functions are only for dma allocator. + * These three functions are only for dma allocator. * Don't use them in device drivers. */ int dma_alloc_from_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle, void **ret); int dma_release_from_coherent(struct device *dev, int order, void *vaddr); +int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, size_t size, int *ret); /* * Standard interface */ -- cgit v1.2.3 From c64be2bb1c6eb43c838b2c6d57b074078be208dd Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 29 Dec 2011 13:09:51 +0100 Subject: drivers: add Contiguous Memory Allocator The Contiguous Memory Allocator is a set of helper functions for DMA mapping framework that improves allocations of contiguous memory chunks. CMA grabs memory on system boot, marks it with MIGRATE_CMA migrate type and gives back to the system. Kernel is allowed to allocate only movable pages within CMA's managed memory so that it can be used for example for page cache when DMA mapping do not use it. On dma_alloc_from_contiguous() request such pages are migrated out of CMA area to free required contiguous block and fulfill the request. This allows to allocate large contiguous chunks of memory at any time assuming that there is enough free memory available in the system. This code is heavily based on earlier works by Michal Nazarewicz. Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park Signed-off-by: Michal Nazarewicz Acked-by: Arnd Bergmann Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song --- include/asm-generic/dma-contiguous.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/asm-generic/dma-contiguous.h (limited to 'include/asm-generic') diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h new file mode 100644 index 000000000000..c544356b374b --- /dev/null +++ b/include/asm-generic/dma-contiguous.h @@ -0,0 +1,28 @@ +#ifndef ASM_DMA_CONTIGUOUS_H +#define ASM_DMA_CONTIGUOUS_H + +#ifdef __KERNEL__ +#ifdef CONFIG_CMA + +#include +#include + +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + if (dev && dev->cma_area) + return dev->cma_area; + return dma_contiguous_default_area; +} + +static inline void dev_set_cma_area(struct device *dev, struct cma *cma) +{ + if (dev) + dev->cma_area = cma; + if (!dev || !dma_contiguous_default_area) + dma_contiguous_default_area = cma; +} + +#endif +#endif + +#endif -- cgit v1.2.3 From 322728e55aa7834e2fab2786b76df183c4843a12 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 18 May 2012 13:59:39 -0400 Subject: KVM: make asm-generic/kvm_para.h have an ifdef __KERNEL__ block There are two functions in this asm-generic file. Looking at other arch which do not use the generic version, these two fcns are within an #ifdef __KERNEL__ block, so make the generic one consistent with those. Signed-off-by: Paul Gortmaker Signed-off-by: Avi Kivity --- include/asm-generic/kvm_para.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/asm-generic') diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h index 9a7bbadb688d..5cba37f9eae1 100644 --- a/include/asm-generic/kvm_para.h +++ b/include/asm-generic/kvm_para.h @@ -1,6 +1,7 @@ #ifndef _ASM_GENERIC_KVM_PARA_H #define _ASM_GENERIC_KVM_PARA_H +#ifdef __KERNEL__ /* * This function is used by architectures that support kvm to avoid issuing @@ -16,4 +17,6 @@ static inline unsigned int kvm_arch_para_features(void) return 0; } +#endif /* _KERNEL__ */ + #endif -- cgit v1.2.3 From 73636b1aacb1a07e6fbe0d25e560e69b024a8e25 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 28 Mar 2012 13:59:18 -0400 Subject: arch/tile: allow building Linux with transparent huge pages enabled The change adds some infrastructure for managing tile pmd's more generally, using pte_pmd() and pmd_pte() methods to translate pmd values to and from ptes, since on TILEPro a pmd is really just a nested structure holding a pgd (aka pte). Several existing pmd methods are moved into this framework, and a whole raft of additional pmd accessors are defined that are used by the transparent hugepage framework. The tile PTE now has a "client2" bit. The bit is used to indicate a transparent huge page is in the process of being split into subpages. This change also fixes a generic bug where the return value of the generic pmdp_splitting_flush() was incorrect. Signed-off-by: Chris Metcalf --- include/asm-generic/pgtable.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 125c54e98517..e2768f188f55 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -158,9 +158,8 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, #endif #ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH -extern pmd_t pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long address, - pmd_t *pmdp); +extern void pmdp_splitting_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); #endif #ifndef __HAVE_ARCH_PTE_SAME -- cgit v1.2.3 From 36126f8f2ed8168eb13aa0662b9b9585cba100a9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 26 May 2012 10:43:17 -0700 Subject: word-at-a-time: make the interfaces truly generic This changes the interfaces in to be a bit more complicated, but a lot more generic. In particular, it allows us to really do the operations efficiently on both little-endian and big-endian machines, pretty much regardless of machine details. For example, if you can rely on a fast population count instruction on your architecture, this will allow you to make your optimized file with that. NOTE! The "generic" version in include/asm-generic/word-at-a-time.h is not truly generic, it actually only works on big-endian. Why? Because on little-endian the generic algorithms are wasteful, since you can inevitably do better. The x86 implementation is an example of that. (The only truly non-generic part of the asm-generic implementation is the "find_zero()" function, and you could make a little-endian version of it. And if the Kbuild infrastructure allowed us to pick a particular header file, that would be lovely) The functions are as follows: - WORD_AT_A_TIME_CONSTANTS: specific constants that the algorithm uses. - has_zero(): take a word, and determine if it has a zero byte in it. It gets the word, the pointer to the constant pool, and a pointer to an intermediate "data" field it can set. This is the "quick-and-dirty" zero tester: it's what is run inside the hot loops. - "prep_zero_mask()": take the word, the data that has_zero() produced, and the constant pool, and generate an *exact* mask of which byte had the first zero. This is run directly *outside* the loop, and allows the "has_zero()" function to answer the "is there a zero byte" question without necessarily getting exactly *which* byte is the first one to contain a zero. If you do multiple byte lookups concurrently (eg "hash_name()", which looks for both NUL and '/' bytes), after you've done the prep_zero_mask() phase, the result of those can be or'ed together to get the "either or" case. - The result from "prep_zero_mask()" can then be fed into "find_zero()" (to find the byte offset of the first byte that was zero) or into "zero_bytemask()" (to find the bytemask of the bytes preceding the zero byte). The existence of zero_bytemask() is optional, and is not necessary for the normal string routines. But dentry name hashing needs it, so if you enable DENTRY_WORD_AT_A_TIME you need to expose it. This changes the generic strncpy_from_user() function and the dentry hashing functions to use these modified word-at-a-time interfaces. This gets us back to the optimized state of the x86 strncpy that we lost in the previous commit when moving over to the generic version. Signed-off-by: Linus Torvalds --- include/asm-generic/word-at-a-time.h | 52 ++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 include/asm-generic/word-at-a-time.h (limited to 'include/asm-generic') diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h new file mode 100644 index 000000000000..3f21f1b72e45 --- /dev/null +++ b/include/asm-generic/word-at-a-time.h @@ -0,0 +1,52 @@ +#ifndef _ASM_WORD_AT_A_TIME_H +#define _ASM_WORD_AT_A_TIME_H + +/* + * This says "generic", but it's actually big-endian only. + * Little-endian can use more efficient versions of these + * interfaces, see for example + * arch/x86/include/asm/word-at-a-time.h + * for those. + */ + +#include + +struct word_at_a_time { + const unsigned long high_bits, low_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) } + +/* Bit set in the bytes that have a zero */ +static inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c) +{ + unsigned long mask = (val & c->low_bits) + c->low_bits; + return ~(mask | rhs); +} + +#define create_zero_mask(mask) (mask) + +static inline long find_zero(unsigned long mask) +{ + long byte = 0; +#ifdef CONFIG_64BIT + if (mask >> 32) + mask >>= 32; + else + byte = 4; +#endif + if (mask >> 16) + mask >>= 16; + else + byte += 2; + return (mask >> 8) ? byte : byte + 1; +} + +static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) +{ + unsigned long rhs = val | c->low_bits; + *data = rhs; + return (val + c->high_bits) & ~rhs; +} + +#endif /* _ASM_WORD_AT_A_TIME_H */ -- cgit v1.2.3 From 56457f38f212344fb38b250cfa7e7311c065022f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 28 May 2012 17:35:22 +0300 Subject: KVM: Export asm-generic/kvm_para.h Prevents build failures on non-KVM archs. Tested-by: Geert Uytterhoeven Signed-off-by: Avi Kivity --- include/asm-generic/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/asm-generic') diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 53f91b1ae53a..2c85a0f647b7 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -8,6 +8,7 @@ header-y += int-ll64.h header-y += ioctl.h header-y += ioctls.h header-y += ipcbuf.h +header-y += kvm_para.h header-y += mman-common.h header-y += mman.h header-y += msgbuf.h -- cgit v1.2.3 From 26c191788f18129af0eb32a358cdaea0c7479626 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 29 May 2012 15:06:49 -0700 Subject: mm: pmd_read_atomic: fix 32bit PAE pmd walk vs pmd_populate SMP race condition When holding the mmap_sem for reading, pmd_offset_map_lock should only run on a pmd_t that has been read atomically from the pmdp pointer, otherwise we may read only half of it leading to this crash. PID: 11679 TASK: f06e8000 CPU: 3 COMMAND: "do_race_2_panic" #0 [f06a9dd8] crash_kexec at c049b5ec #1 [f06a9e2c] oops_end at c083d1c2 #2 [f06a9e40] no_context at c0433ded #3 [f06a9e64] bad_area_nosemaphore at c043401a #4 [f06a9e6c] __do_page_fault at c0434493 #5 [f06a9eec] do_page_fault at c083eb45 #6 [f06a9f04] error_code (via page_fault) at c083c5d5 EAX: 01fb470c EBX: fff35000 ECX: 00000003 EDX: 00000100 EBP: 00000000 DS: 007b ESI: 9e201000 ES: 007b EDI: 01fb4700 GS: 00e0 CS: 0060 EIP: c083bc14 ERR: ffffffff EFLAGS: 00010246 #7 [f06a9f38] _spin_lock at c083bc14 #8 [f06a9f44] sys_mincore at c0507b7d #9 [f06a9fb0] system_call at c083becd start len EAX: ffffffda EBX: 9e200000 ECX: 00001000 EDX: 6228537f DS: 007b ESI: 00000000 ES: 007b EDI: 003d0f00 SS: 007b ESP: 62285354 EBP: 62285388 GS: 0033 CS: 0073 EIP: 00291416 ERR: 000000da EFLAGS: 00000286 This should be a longstanding bug affecting x86 32bit PAE without THP. Only archs with 64bit large pmd_t and 32bit unsigned long should be affected. With THP enabled the barrier() in pmd_none_or_trans_huge_or_clear_bad() would partly hide the bug when the pmd transition from none to stable, by forcing a re-read of the *pmd in pmd_offset_map_lock, but when THP is enabled a new set of problem arises by the fact could then transition freely in any of the none, pmd_trans_huge or pmd_trans_stable states. So making the barrier in pmd_none_or_trans_huge_or_clear_bad() unconditional isn't good idea and it would be a flakey solution. This should be fully fixed by introducing a pmd_read_atomic that reads the pmd in order with THP disabled, or by reading the pmd atomically with cmpxchg8b with THP enabled. Luckily this new race condition only triggers in the places that must already be covered by pmd_none_or_trans_huge_or_clear_bad() so the fix is localized there but this bug is not related to THP. NOTE: this can trigger on x86 32bit systems with PAE enabled with more than 4G of ram, otherwise the high part of the pmd will never risk to be truncated because it would be zero at all times, in turn so hiding the SMP race. This bug was discovered and fully debugged by Ulrich, quote: ---- [..] pmd_none_or_trans_huge_or_clear_bad() loads the content of edx and eax. 496 static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) 497 { 498 /* depend on compiler for an atomic pmd read */ 499 pmd_t pmdval = *pmd; // edi = pmd pointer 0xc0507a74 : mov 0x8(%esp),%edi ... // edx = PTE page table high address 0xc0507a84 : mov 0x4(%edi),%edx ... // eax = PTE page table low address 0xc0507a8e : mov (%edi),%eax [..] Please note that the PMD is not read atomically. These are two "mov" instructions where the high order bits of the PMD entry are fetched first. Hence, the above machine code is prone to the following race. - The PMD entry {high|low} is 0x0000000000000000. The "mov" at 0xc0507a84 loads 0x00000000 into edx. - A page fault (on another CPU) sneaks in between the two "mov" instructions and instantiates the PMD. - The PMD entry {high|low} is now 0x00000003fda38067. The "mov" at 0xc0507a8e loads 0xfda38067 into eax. ---- Reported-by: Ulrich Obergfell Signed-off-by: Andrea Arcangeli Cc: Mel Gorman Cc: Hugh Dickins Cc: Larry Woodman Cc: Petr Matousek Cc: Rik van Riel Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index e2768f188f55..6f2b45a9b6bc 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -445,6 +445,18 @@ static inline int pmd_write(pmd_t pmd) #endif /* __HAVE_ARCH_PMD_WRITE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#ifndef pmd_read_atomic +static inline pmd_t pmd_read_atomic(pmd_t *pmdp) +{ + /* + * Depend on compiler for an atomic pmd read. NOTE: this is + * only going to work, if the pmdval_t isn't larger than + * an unsigned long. + */ + return *pmdp; +} +#endif + /* * This function is meant to be used by sites walking pagetables with * the mmap_sem hold in read mode to protect against MADV_DONTNEED and @@ -458,11 +470,17 @@ static inline int pmd_write(pmd_t pmd) * undefined so behaving like if the pmd was none is safe (because it * can return none anyway). The compiler level barrier() is critically * important to compute the two checks atomically on the same pmdval. + * + * For 32bit kernels with a 64bit large pmd_t this automatically takes + * care of reading the pmd atomically to avoid SMP race conditions + * against pmd_populate() when the mmap_sem is hold for reading by the + * caller (a special atomic read not done by "gcc" as in the generic + * version above, is also needed when THP is disabled because the page + * fault can populate the pmd from under us). */ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) { - /* depend on compiler for an atomic pmd read */ - pmd_t pmdval = *pmd; + pmd_t pmdval = pmd_read_atomic(pmd); /* * The barrier will stabilize the pmdval in a register or on * the stack so that it will stop changing under the code. -- cgit v1.2.3 From bb8ac181a5cf50458a0d83b4460790badc9fdc16 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 10:25:23 -0400 Subject: bury __kernel_nlink_t, make internal nlink_t consistent Signed-off-by: Al Viro --- include/asm-generic/posix_types.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/posix_types.h b/include/asm-generic/posix_types.h index 91d44bd4dde3..fe74fccf18db 100644 --- a/include/asm-generic/posix_types.h +++ b/include/asm-generic/posix_types.h @@ -23,10 +23,6 @@ typedef __kernel_ulong_t __kernel_ino_t; typedef unsigned int __kernel_mode_t; #endif -#ifndef __kernel_nlink_t -typedef __kernel_ulong_t __kernel_nlink_t; -#endif - #ifndef __kernel_pid_t typedef int __kernel_pid_t; #endif -- cgit v1.2.3 From 133fd9f5cda2d86904126f4b9fa4e8f4330c9569 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 31 May 2012 16:26:08 -0700 Subject: vsprintf: further optimize decimal conversion Previous code was using optimizations which were developed to work well even on narrow-word CPUs (by today's standards). But Linux runs only on 32-bit and wider CPUs. We can use that. First: using 32x32->64 multiply and trivial 32-bit shift, we can correctly divide by 10 much larger numbers, and thus we can print groups of 9 digits instead of groups of 5 digits. Next: there are two algorithms to print larger numbers. One is generic: divide by 1000000000 and repeatedly print groups of (up to) 9 digits. It's conceptually simple, but requires an (unsigned long long) / 1000000000 division. Second algorithm splits 64-bit unsigned long long into 16-bit chunks, manipulates them cleverly and generates groups of 4 decimal digits. It so happens that it does NOT require long long division. If long is > 32 bits, division of 64-bit values is relatively easy, and we will use the first algorithm. If long long is > 64 bits (strange architecture with VERY large long long), second algorithm can't be used, and we again use the first one. Else (if long is 32 bits and long long is 64 bits) we use second one. And third: there is a simple optimization which takes fast path not only for zero as was done before, but for all one-digit numbers. In all tested cases new code is faster than old one, in many cases by 30%, in few cases by more than 50% (for example, on x86-32, conversion of 12345678). Code growth is ~0 in 32-bit case and ~130 bytes in 64-bit case. This patch is based upon an original from Michal Nazarewicz. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Michal Nazarewicz Signed-off-by: Denys Vlasenko Cc: Douglas W Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/bitsperlong.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/asm-generic') diff --git a/include/asm-generic/bitsperlong.h b/include/asm-generic/bitsperlong.h index 4ae54e07de83..a7b0914348fd 100644 --- a/include/asm-generic/bitsperlong.h +++ b/include/asm-generic/bitsperlong.h @@ -28,5 +28,9 @@ #error Inconsistent word size. Check asm/bitsperlong.h #endif +#ifndef BITS_PER_LONG_LONG +#define BITS_PER_LONG_LONG 64 +#endif + #endif /* __KERNEL__ */ #endif /* __ASM_GENERIC_BITS_PER_LONG */ -- cgit v1.2.3