From 1cb7b1e0de6a1f8f071f4a146e3d10f3a662f707 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 31 Mar 2011 13:36:38 +0200 Subject: ACPI EC: remove dead code static void acpi_ec_gpe_query(void *ec_cxt); -> The function is right above this declaration -> not needed. poll_force is also not used, cleaned up in ec.c and its users: compal-laptop and msi-laptop. Signed-off-by: Thomas Renninger Signed-off-by: Len Brown --- include/linux/acpi.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a2e910e01293..1deb2a73c2da 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -150,8 +150,7 @@ extern int ec_read(u8 addr, u8 *val); extern int ec_write(u8 addr, u8 val); extern int ec_transaction(u8 command, const u8 *wdata, unsigned wdata_len, - u8 *rdata, unsigned rdata_len, - int force_poll); + u8 *rdata, unsigned rdata_len); #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE) -- cgit v1.2.3 From 31d68ef65c7d49def19c1bae4e01b87d66cf5a56 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 24 Feb 2011 11:25:33 -0800 Subject: SUNRPC: Don't wait for full record to receive tcp data Ensure that we immediately read and buffer data from the incoming TCP stream so that we grow the receive window quickly, and don't deadlock on large READ or WRITE requests. Also do some minor exit cleanup. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svcsock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 04dba23c59f2..85c50b40759d 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -28,6 +28,7 @@ struct svc_sock { /* private TCP part */ u32 sk_reclen; /* length of record */ u32 sk_tcplen; /* current read length */ + struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ }; /* -- cgit v1.2.3 From a62573dc353b255dbbc8520bfdcb1c8a8c1ada87 Mon Sep 17 00:00:00 2001 From: Mi Jinlong Date: Wed, 23 Mar 2011 17:57:07 +0800 Subject: nfsd41: add flag checking for create_session Teach the NFS server to reject invalid create_session flags. Also do some minor formatting adjustments. Signed-off-by: Mi Jinlong Signed-off-by: J. Bruce Fields --- include/linux/nfs4.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index b528f6d4b860..8937727e7039 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -570,9 +570,11 @@ struct nfs4_sessionid { }; /* Create Session Flags */ -#define SESSION4_PERSIST 0x001 -#define SESSION4_BACK_CHAN 0x002 -#define SESSION4_RDMA 0x004 +#define SESSION4_PERSIST 0x001 +#define SESSION4_BACK_CHAN 0x002 +#define SESSION4_RDMA 0x004 + +#define SESSION4_FLAG_MASK_A 0x007 enum state_protect_how4 { SP4_NONE = 0, -- cgit v1.2.3 From aecb7b64dd9e2512c7a4c7e61dd781415d3dac5a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 24 May 2011 14:04:09 +0530 Subject: dmaengine/dw_dmac: Update maintainer-ship Nobody is currently maintaining dw_dmac. We are using dw_dmac for SPEAr13xx and are currently maintaining it. After discussing with Vinod, sending this patch to update maintainer-ship of dw_dmac. Signed-off-by: Viresh Kumar Acked-by: Havard Skinnemoen Signed-off-by: Vinod Koul --- include/linux/dw_dmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h index 6998d9376ef9..4bfe0a2f7d50 100644 --- a/include/linux/dw_dmac.h +++ b/include/linux/dw_dmac.h @@ -3,6 +3,7 @@ * AVR32 systems.) * * Copyright (C) 2007 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as -- cgit v1.2.3 From f29c50419c8d1998edd759f1990c4243a248f469 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 19 May 2011 14:35:33 -0400 Subject: maccess,probe_kernel: Make write/read src const void * The functions probe_kernel_write() and probe_kernel_read() do not modify the src pointer. Allow const pointers to be passed in without the need of a typecast. Acked-by: Mike Frysinger Acked-by: Heiko Carstens Acked-by: Martin Schwidefsky Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/1305824936.1465.4.camel@gandalf.stny.rr.com --- include/linux/uaccess.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index d512d98dfb7d..5ca0951e1855 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -93,8 +93,8 @@ static inline unsigned long __copy_from_user_nocache(void *to, * Safely read from address @src to the buffer at @dst. If a kernel fault * happens, handle that and return -EFAULT. */ -extern long probe_kernel_read(void *dst, void *src, size_t size); -extern long __probe_kernel_read(void *dst, void *src, size_t size); +extern long probe_kernel_read(void *dst, const void *src, size_t size); +extern long __probe_kernel_read(void *dst, const void *src, size_t size); /* * probe_kernel_write(): safely attempt to write to a location @@ -105,7 +105,7 @@ extern long __probe_kernel_read(void *dst, void *src, size_t size); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -extern long notrace probe_kernel_write(void *dst, void *src, size_t size); -extern long notrace __probe_kernel_write(void *dst, void *src, size_t size); +extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); +extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size); #endif /* __LINUX_UACCESS_H__ */ -- cgit v1.2.3 From 916f676f8dc016103f983c7ec54c18ecdbb6e349 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 25 May 2011 09:53:13 -0400 Subject: x86, efi: Retain boot service code until after switching to virtual mode UEFI stands for "Unified Extensible Firmware Interface", where "Firmware" is an ancient African word meaning "Why do something right when you can do it so wrong that children will weep and brave adults will cower before you", and "UEI" is Celtic for "We missed DOS so we burned it into your ROMs". The UEFI specification provides for runtime services (ie, another way for the operating system to be forced to depend on the firmware) and we rely on these for certain trivial tasks such as setting up the bootloader. But some hardware fails to work if we attempt to use these runtime services from physical mode, and so we have to switch into virtual mode. So far so dreadful. The specification makes it clear that the operating system is free to do whatever it wants with boot services code after ExitBootServices() has been called. SetVirtualAddressMap() can't be called until ExitBootServices() has been. So, obviously, a whole bunch of EFI implementations call into boot services code when we do that. Since we've been charmingly naive and trusted that the specification may be somehow relevant to the real world, we've already stuffed a picture of a penguin or something in that address space. And just to make things more entertaining, we've also marked it non-executable. This patch allocates the boot services regions during EFI init and makes sure that they're executable. Then, after SetVirtualAddressMap(), it discards them and everyone lives happily ever after. Except for the ones who have to work on EFI, who live sad lives haunted by the knowledge that someone's eventually going to write yet another firmware specification. [ hpa: adding this to urgent with a stable tag since it fixes currently-broken hardware. However, I do not know what the dependencies are and so I do not know which -stable versions this may be a candidate for. ] Signed-off-by: Matthew Garrett Link: http://lkml.kernel.org/r/1306331593-28715-1-git-send-email-mjg@redhat.com Signed-off-by: H. Peter Anvin Cc: Tony Luck Cc: --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 33fa1203024e..e376270cd26e 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -299,6 +299,7 @@ extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); extern unsigned long efi_get_time(void); extern int efi_set_rtc_mmss(unsigned long nowtime); +extern void efi_reserve_boot_services(void); extern struct efi_memory_map memmap; /** -- cgit v1.2.3 From 2fc1b6f0d0a719e1e2a30bf076a3a799feaf6af2 Mon Sep 17 00:00:00 2001 From: liubo Date: Tue, 19 Apr 2011 09:35:28 +0800 Subject: tracing: Add __print_symbolic_u64 to avoid warnings on 32bit machine Filesystem, like Btrfs, has some "ULL" macros, and when these macros are passed to tracepoints'__print_symbolic(), there will be 64->32 truncate WARNINGS during compiling on 32bit box. Signed-off-by: Liu Bo Link: http://lkml.kernel.org/r/4DACE6E0.7000507@cn.fujitsu.com Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index b5a550a39a70..59d3ef100eb9 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -16,6 +16,11 @@ struct trace_print_flags { const char *name; }; +struct trace_print_flags_u64 { + unsigned long long mask; + const char *name; +}; + const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, const struct trace_print_flags *flag_array); @@ -23,6 +28,13 @@ const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, const struct trace_print_flags *symbol_array); +#if BITS_PER_LONG == 32 +const char *ftrace_print_symbols_seq_u64(struct trace_seq *p, + unsigned long long val, + const struct trace_print_flags_u64 + *symbol_array); +#endif + const char *ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int len); -- cgit v1.2.3 From b1cff0ad1062621ae63cb6c5dc4165191fe2e9f1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 May 2011 14:27:43 -0400 Subject: ftrace: Add internal recursive checks Witold reported a reboot caused by the selftests of the dynamic function tracer. He sent me a config and I used ktest to do a config_bisect on it (as my config did not cause the crash). It pointed out that the problem config was CONFIG_PROVE_RCU. What happened was that if multiple callbacks are attached to the function tracer, we iterate a list of callbacks. Because the list is managed by synchronize_sched() and preempt_disable, the access to the pointers uses rcu_dereference_raw(). When PROVE_RCU is enabled, the rcu_dereference_raw() calls some debugging functions, which happen to be traced. The tracing of the debug function would then call rcu_dereference_raw() which would then call the debug function and then... well you get the idea. I first wrote two different patches to solve this bug. 1) add a __rcu_dereference_raw() that would not do any checks. 2) add notrace to the offending debug functions. Both of these patches worked. Talking with Paul McKenney on IRC, he suggested to add recursion detection instead. This seemed to be a better solution, so I decided to implement it. As the task_struct already has a trace_recursion to detect recursion in the ring buffer, and that has a very small number it allows, I decided to use that same variable to add flags that can detect the recursion inside the infrastructure of the function tracer. I plan to change it so that the task struct bit can be checked in mcount, but as that requires changes to all archs, I will hold that off to the next merge window. Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Frederic Weisbecker Cc: Paul E. McKenney Link: http://lkml.kernel.org/r/1306348063.1465.116.camel@gandalf.stny.rr.com Reported-by: Witold Baryluk Signed-off-by: Steven Rostedt --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d8b2d0bec0d8..7b78d9cad471 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1513,7 +1513,7 @@ struct task_struct { #ifdef CONFIG_TRACING /* state flags for use by tracers */ unsigned long trace; - /* bitmask of trace recursion */ + /* bitmask and counter of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */ -- cgit v1.2.3 From 7fe4fc881d1340f17396f52c836e597dadadea42 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Wed, 25 May 2011 10:33:17 -0700 Subject: Intel xhci: Add PCI id for Panther Point xHCI host. This adds the PCI ID for the xHCI (USB 3.0) host controller in the Intel Panther Point chipset. It will be used by both the EHCI and xHCI driver in the following patches. Signed-off-by: Sarah Sharp --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 24787b751286..a311008af5e1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2483,6 +2483,7 @@ #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX 0x1c5f #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0 0x1d40 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1 0x1d41 +#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI 0x1e31 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN 0x1e40 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX 0x1e5f #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN 0x2310 -- cgit v1.2.3 From aa38572954ade525817fe88c54faebf85e5a61c0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 May 2011 06:53:02 -0400 Subject: fs: pass exact type of data dirties to ->dirty_inode Tell the filesystem if we just updated timestamp (I_DIRTY_SYNC) or anything else, so that the filesystem can track internally if it needs to push out a transaction for fdatasync or not. This is just the prototype change with no user for it yet. I plan to push large XFS changes for the next merge window, and getting this trivial infrastructure in this window would help a lot to avoid tree interdependencies. Also remove incorrect comments that ->dirty_inode can't block. That has been changed a long time ago, and many implementations rely on it. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/ext3_fs.h | 2 +- include/linux/fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 85c1d302c12e..5e06acf95d0f 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -909,7 +909,7 @@ extern int ext3_setattr (struct dentry *, struct iattr *); extern void ext3_evict_inode (struct inode *); extern int ext3_sync_inode (handle_t *, struct inode *); extern void ext3_discard_reservation (struct inode *); -extern void ext3_dirty_inode(struct inode *); +extern void ext3_dirty_inode(struct inode *, int); extern int ext3_change_inode_journal_flag(struct inode *, int); extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *); extern int ext3_can_truncate(struct inode *inode); diff --git a/include/linux/fs.h b/include/linux/fs.h index 241609346dfb..573028df050d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1618,7 +1618,7 @@ struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*dirty_inode) (struct inode *); + void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, struct writeback_control *wbc); int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); -- cgit v1.2.3 From 176e21ee2ec89cae8d45cf1a850ea45a45428fb8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 9 May 2011 15:22:44 -0400 Subject: SUNRPC: Support for RPC over AF_LOCAL transports TI-RPC introduces the capability of performing RPC over AF_LOCAL sockets. It uses this mainly for registering and unregistering local RPC services securely with the local rpcbind, but we could also conceivably use it as a generic upcall mechanism. This patch provides a client-side only implementation for the moment. We might also consider a server-side implementation to provide AF_LOCAL access to NLM (for statd downcalls, and such like). Autobinding is not supported on kernel AF_LOCAL transports at this time. Kernel ULPs must specify the pathname of the remote endpoint when an AF_LOCAL transport is created. rpcbind supports registering services available via AF_LOCAL, so the kernel could handle it with some adjustment to ->rpcbind and ->set_port. But we don't need this feature for doing upcalls via well-known named sockets. This has not been tested with ULPs that move a substantial amount of data. Thus, I can't attest to how robust the write_space and congestion management logic is. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/msg_prot.h | 1 + include/linux/sunrpc/xprt.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 77e624883393..c68a147939a6 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -145,6 +145,7 @@ typedef __be32 rpc_fraghdr; #define RPCBIND_NETID_TCP "tcp" #define RPCBIND_NETID_UDP6 "udp6" #define RPCBIND_NETID_TCP6 "tcp6" +#define RPCBIND_NETID_LOCAL "local" /* * Note that RFC 1833 does not put any size restrictions on the diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a0f998c07c65..81cce3b3ee66 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -141,7 +141,8 @@ enum xprt_transports { XPRT_TRANSPORT_UDP = IPPROTO_UDP, XPRT_TRANSPORT_TCP = IPPROTO_TCP, XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC, - XPRT_TRANSPORT_RDMA = 256 + XPRT_TRANSPORT_RDMA = 256, + XPRT_TRANSPORT_LOCAL = 257, }; struct rpc_xprt { -- cgit v1.2.3 From 1e1b6c511d1b23cb7c3b619d82fc7bd9f620565d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 19 May 2011 15:08:58 +0900 Subject: cpuset: Fix cpuset_cpus_allowed_fallback(), don't update tsk->rt.nr_cpus_allowed The rule is, we have to update tsk->rt.nr_cpus_allowed if we change tsk->cpus_allowed. Otherwise RT scheduler may confuse. Signed-off-by: KOSAKI Motohiro Cc: Oleg Nesterov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/4DD4B3FA.5060901@jp.fujitsu.com Signed-off-by: Ingo Molnar --- include/linux/cpuset.h | 2 +- include/linux/sched.h | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index f20eb8f16025..e9eaec522655 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -146,7 +146,7 @@ static inline void cpuset_cpus_allowed(struct task_struct *p, static inline int cpuset_cpus_allowed_fallback(struct task_struct *p) { - cpumask_copy(&p->cpus_allowed, cpu_possible_mask); + do_set_cpus_allowed(p, cpu_possible_mask); return cpumask_any(cpu_active_mask); } diff --git a/include/linux/sched.h b/include/linux/sched.h index dc8871295a5a..8da84b7bc1b8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1841,9 +1841,16 @@ static inline void rcu_copy_process(struct task_struct *p) #endif #ifdef CONFIG_SMP +extern void do_set_cpus_allowed(struct task_struct *p, + const struct cpumask *new_mask); + extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); #else +static inline void do_set_cpus_allowed(struct task_struct *p, + const struct cpumask *new_mask) +{ +} static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { -- cgit v1.2.3 From 55c2945aa9d4d907ec5ca4f6a4e30ae908d8d30d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 11 May 2011 05:33:33 -0700 Subject: atomic: Add atomic_or() An atomic_or() function is needed by TREE_RCU to avoid deadlock, so add a generic version. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 96c038e43d66..ee456c79b0e6 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -34,4 +34,17 @@ static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint) } #endif +#ifndef CONFIG_ARCH_HAS_ATOMIC_OR +static inline void atomic_or(int i, atomic_t *v) +{ + int old; + int new; + + do { + old = atomic_read(v); + new = old | i; + } while (atomic_cmpxchg(v, old, new) != old); +} +#endif /* #ifndef CONFIG_ARCH_HAS_ATOMIC_OR */ + #endif /* _LINUX_ATOMIC_H */ -- cgit v1.2.3 From 69b4573296469fd3f70cf7044693074980517067 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 28 May 2011 08:25:51 -0700 Subject: Cache xattr security drop check for write v2 Some recent benchmarking on btrfs showed that a major scaling bottleneck on large systems on btrfs is currently the xattr lookup on every write. Why xattr lookup on every write I hear you ask? write wants to drop suid and security related xattrs that could set o capabilities for executables. To do that it currently looks up security.capability on EVERY write (even for non executables) to decide whether to drop it or not. In btrfs this causes an additional tree walk, hitting some per file system locks and quite bad scalability. In a simple read workload on a 8S system I saw over 90% CPU time in spinlocks related to that. Chris Mason tells me this is also a problem in ext4, where it hits the global mbcache lock. This patch adds a simple per inode to avoid this problem. We only do the lookup once per file and then if there is no xattr cache the decision. All xattr changes clear the flag. I also used the same flag to avoid the suid check, although that one is pretty cheap. A file system can also set this flag when it creates the inode, if it has a cheap way to do so. This is done for some common file systems in followon patches. With this patch a major part of the lock contention disappears for btrfs. Some testing on smaller systems didn't show significant performance changes, but at least it helps the larger systems and is generally more efficient. v2: Rename is_sgid. add file system helper. Cc: chris.mason@oracle.com Cc: josef@redhat.com Cc: viro@zeniv.linux.org.uk Cc: agruen@linbit.com Cc: Serge E. Hallyn Signed-off-by: Andi Kleen Signed-off-by: Al Viro --- include/linux/fs.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 573028df050d..c55d6b7cd5d6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -237,6 +237,7 @@ struct inodes_stat_t { #define S_PRIVATE 512 /* Inode is fs-internal */ #define S_IMA 1024 /* Inode has an associated IMA struct */ #define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ +#define S_NOSEC 4096 /* no suid or xattr security attributes */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -273,6 +274,7 @@ struct inodes_stat_t { #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) #define IS_IMA(inode) ((inode)->i_flags & S_IMA) #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) +#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ @@ -2582,5 +2584,16 @@ int __init get_filesystem_list(char *buf); #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ (flag & __FMODE_NONOTIFY))) +static inline int is_sxid(mode_t mode) +{ + return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); +} + +static inline void inode_has_no_xattr(struct inode *inode) +{ + if (!is_sxid(inode->i_mode)) + inode->i_flags |= S_NOSEC; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_FS_H */ -- cgit v1.2.3 From 333c5ae9948194428fe6c5ef5c088304fc98263b Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Fri, 11 Feb 2011 12:49:04 -0800 Subject: idle governor: Avoid lock acquisition to read pm_qos before entering idle Thanks to the reviews and comments by Rafael, James, Mark and Andi. Here's version 2 of the patch incorporating your comments and also some update to my previous patch comments. I noticed that before entering idle state, the menu idle governor will look up the current pm_qos target value according to the list of qos requests received. This look up currently needs the acquisition of a lock to access the list of qos requests to find the qos target value, slowing down the entrance into idle state due to contention by multiple cpus to access this list. The contention is severe when there are a lot of cpus waking and going into idle. For example, for a simple workload that has 32 pair of processes ping ponging messages to each other, where 64 cpu cores are active in test system, I see the following profile with 37.82% of cpu cycles spent in contention of pm_qos_lock: - 37.82% swapper [kernel.kallsyms] [k] _raw_spin_lock_irqsave - _raw_spin_lock_irqsave - 95.65% pm_qos_request menu_select cpuidle_idle_call - cpu_idle 99.98% start_secondary A better approach will be to cache the updated pm_qos target value so reading it does not require lock acquisition as in the patch below. With this patch the contention for pm_qos_lock is removed and I saw a 2.2X increase in throughput for my message passing workload. cc: stable@kernel.org Signed-off-by: Tim Chen Acked-by: Andi Kleen Acked-by: James Bottomley Acked-by: mark gross Signed-off-by: Len Brown --- include/linux/pm_qos_params.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h index 77cbddb3784c..a7d87f911cab 100644 --- a/include/linux/pm_qos_params.h +++ b/include/linux/pm_qos_params.h @@ -16,6 +16,10 @@ #define PM_QOS_NUM_CLASSES 4 #define PM_QOS_DEFAULT_VALUE -1 +#define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) +#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) +#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 + struct pm_qos_request_list { struct plist_node list; int pm_qos_class; -- cgit v1.2.3 From a43a9d93d40a69eceeb4e4a4c860cc20186d475c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 29 May 2011 12:40:50 +0200 Subject: [S390] mm: fix storage key handling page_get_storage_key() and page_set_storage_key() expect a page address and not its page frame number. This got inconsistent with 2d42552d "[S390] merge page_test_dirty and page_clear_dirty". Result is that we read/write storage keys from random pages and do not have a working dirty bit tracking at all. E.g. SetPageUpdate() doesn't clear the dirty bit of requested pages, which for example ext4 doesn't like very much and panics after a while. Unable to handle kernel paging request at virtual user address (null) Oops: 0004 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 1 Not tainted 2.6.39-07551-g139f37f-dirty #152 Process flush-94:0 (pid: 1576, task: 000000003eb34538, ksp: 000000003c287b70) Krnl PSW : 0704c00180000000 0000000000316b12 (jbd2_journal_file_inode+0x10e/0x138) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 EA:3 Krnl GPRS: 0000000000000000 0000000000000000 0000000000000000 0700000000000000 0000000000316a62 000000003eb34cd0 0000000000000025 000000003c287b88 0000000000000001 000000003c287a70 000000003f1ec678 000000003f1ec000 0000000000000000 000000003e66ec00 0000000000316a62 000000003c287988 Krnl Code: 0000000000316b04: f0a0000407f4 srp 4(11,%r0),2036,0 0000000000316b0a: b9020022 ltgr %r2,%r2 0000000000316b0e: a7740015 brc 7,316b38 >0000000000316b12: e3d0c0000024 stg %r13,0(%r12) 0000000000316b18: 4120c010 la %r2,16(%r12) 0000000000316b1c: 4130d060 la %r3,96(%r13) 0000000000316b20: e340d0600004 lg %r4,96(%r13) 0000000000316b26: c0e50002b567 brasl %r14,36d5f4 Call Trace: ([<0000000000316a62>] jbd2_journal_file_inode+0x5e/0x138) [<00000000002da13c>] mpage_da_map_and_submit+0x2e8/0x42c [<00000000002daac2>] ext4_da_writepages+0x2da/0x504 [<00000000002597e8>] writeback_single_inode+0xf8/0x268 [<0000000000259f06>] writeback_sb_inodes+0xd2/0x18c [<000000000025a700>] writeback_inodes_wb+0x80/0x168 [<000000000025aa92>] wb_writeback+0x2aa/0x324 [<000000000025abde>] wb_do_writeback+0xd2/0x274 [<000000000025ae3a>] bdi_writeback_thread+0xba/0x1c4 [<00000000001737be>] kthread+0xa6/0xb0 [<000000000056c1da>] kernel_thread_starter+0x6/0xc [<000000000056c1d4>] kernel_thread_starter+0x0/0xc INFO: lockdep is turned off. Last Breaking-Event-Address: [<0000000000316a8a>] jbd2_journal_file_inode+0x86/0x138 Reported-by: Sebastian Ott Signed-off-by: Heiko Carstens --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 79a6700b7162..6081493db68f 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -308,7 +308,7 @@ static inline void SetPageUptodate(struct page *page) { #ifdef CONFIG_S390 if (!test_and_set_bit(PG_uptodate, &page->flags)) - page_set_storage_key(page_to_pfn(page), PAGE_DEFAULT_KEY, 0); + page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, 0); #else /* * Memory barrier must be issued before setting the PG_uptodate bit, -- cgit v1.2.3 From 4c2593270133708698d4b8cea2dab469479ad13b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sun, 29 May 2011 12:52:55 +0100 Subject: dm table: allow targets to support discards internally Permit a target to support discards regardless of whether or not all its underlying devices do. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 32a4423710f5..4427e0454051 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -191,6 +191,12 @@ struct dm_target { /* Used to provide an error string from the ctr */ char *error; + + /* + * Set if this target needs to receive discards regardless of + * whether or not its underlying devices have support. + */ + unsigned discards_supported:1; }; /* Each target can link one of these into the table */ -- cgit v1.2.3 From bda8efec5c706a672e0714d341a342e811f0262a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 29 May 2011 13:03:09 +0100 Subject: dm io: use fixed initial mempool size Replace the arbitrary calculation of an initial io struct mempool size with a constant. The code calculated the number of reserved structures based on the request size and used a "magic" multiplication constant of 4. This patch changes it to reserve a fixed number - itself still chosen quite arbitrarily. Further testing might show if there is a better number to choose. Note that if there is no memory pressure, we can still allocate an arbitrary number of "struct io" structures. One structure is enough to process the whole request. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/dm-io.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 5c9186b93fff..f4b0aa3126f5 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -69,8 +69,7 @@ struct dm_io_request { * * Create/destroy may block. */ -struct dm_io_client *dm_io_client_create(unsigned num_pages); -int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client); +struct dm_io_client *dm_io_client_create(void); void dm_io_client_destroy(struct dm_io_client *client); /* -- cgit v1.2.3 From 5f43ba2950414dc0abf4ac44c397d88069056746 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 29 May 2011 13:03:11 +0100 Subject: dm kcopyd: reserve fewer pages Reserve just the minimum of pages needed to process one job. Because we allocate pages from page allocator, we don't need to reserve a large number of pages. The maximum job size is SUB_JOB_SIZE and we calculate the number of reserved pages based on this. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/dm-kcopyd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index 5db216311695..312513d4741a 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -25,8 +25,7 @@ * To use kcopyd you must first create a dm_kcopyd_client object. */ struct dm_kcopyd_client; -int dm_kcopyd_client_create(unsigned num_pages, - struct dm_kcopyd_client **result); +int dm_kcopyd_client_create(struct dm_kcopyd_client **result); void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc); /* -- cgit v1.2.3 From fa34ce73072f90ecd90dcc43f29d82e70e5f8676 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 29 May 2011 13:03:13 +0100 Subject: dm kcopyd: return client directly and not through a pointer Return client directly from dm_kcopyd_client_create, not through a parameter, making it consistent with dm_io_client_create. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/dm-kcopyd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index 312513d4741a..298d587e349b 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -25,7 +25,7 @@ * To use kcopyd you must first create a dm_kcopyd_client object. */ struct dm_kcopyd_client; -int dm_kcopyd_client_create(struct dm_kcopyd_client **result); +struct dm_kcopyd_client *dm_kcopyd_client_create(void); void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc); /* -- cgit v1.2.3 From f7da7a129d57bfe0f74573dc03531c63e1360fae Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Thu, 19 May 2011 14:16:47 -0400 Subject: SUNRPC: introduce xdr_init_decode_pages Initialize xdr_stream and xdr_buf using an array of page pointers and length of buffer. Signed-off-by: Benny Halevy --- include/linux/sunrpc/xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index fc84b7a19ca3..a20970ef9e4e 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -216,6 +216,8 @@ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); +extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, unsigned int len); extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); -- cgit v1.2.3 From 38b7c401f6ade50543f246c4bc2c971edf2b19dd Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:49:32 +0300 Subject: pnfs-obj: pnfs_osd XDR definitions * Add the pnfs_osd_xdr.h header * defintions the pnfs_osd_layout structure including all it's sub-types and constants. * Declare the pnfs_osd_xdr_decode_layout API + all needed inline helpers. * Define the pnfs_osd_deviceaddr structure and all its subtypes and constants. * Declare API for decoding of a pnfs_osd_deviceaddr from XDR stream. * Define the pnfs_osd_ioerr structure, its substructures and constants. * Declare API for encoding of a pnfs_osd_ioerr into XDR stream. * Define the pnfs_osd_layoutupdate structure and its substructures. * Declare API for encoding of a pnfs_osd_layoutupdate into XDR stream. [Remove server definitions] Signed-off-by: Boaz Harrosh Signed-off-by: Benny Halevy --- include/linux/pnfs_osd_xdr.h | 345 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 345 insertions(+) create mode 100644 include/linux/pnfs_osd_xdr.h (limited to 'include/linux') diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h new file mode 100644 index 000000000000..76efbdd01622 --- /dev/null +++ b/include/linux/pnfs_osd_xdr.h @@ -0,0 +1,345 @@ +/* + * pNFS-osd on-the-wire data structures + * + * Copyright (C) 2007 Panasas Inc. [year of first publication] + * All rights reserved. + * + * Benny Halevy + * Boaz Harrosh + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * See the file COPYING included with this distribution for more details. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Panasas company nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __PNFS_OSD_XDR_H__ +#define __PNFS_OSD_XDR_H__ + +#include +#include +#include + +#define PNFS_OSD_OSDNAME_MAXSIZE 256 + +/* + * draft-ietf-nfsv4-minorversion-22 + * draft-ietf-nfsv4-pnfs-obj-12 + */ + +/* Layout Structure */ + +enum pnfs_osd_raid_algorithm4 { + PNFS_OSD_RAID_0 = 1, + PNFS_OSD_RAID_4 = 2, + PNFS_OSD_RAID_5 = 3, + PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */ +}; + +/* struct pnfs_osd_data_map4 { + * uint32_t odm_num_comps; + * length4 odm_stripe_unit; + * uint32_t odm_group_width; + * uint32_t odm_group_depth; + * uint32_t odm_mirror_cnt; + * pnfs_osd_raid_algorithm4 odm_raid_algorithm; + * }; + */ +struct pnfs_osd_data_map { + u32 odm_num_comps; + u64 odm_stripe_unit; + u32 odm_group_width; + u32 odm_group_depth; + u32 odm_mirror_cnt; + u32 odm_raid_algorithm; +}; + +/* struct pnfs_osd_objid4 { + * deviceid4 oid_device_id; + * uint64_t oid_partition_id; + * uint64_t oid_object_id; + * }; + */ +struct pnfs_osd_objid { + struct nfs4_deviceid oid_device_id; + u64 oid_partition_id; + u64 oid_object_id; +}; + +/* For printout. I use: + * kprint("dev(%llx:%llx)", _DEVID_LO(pointer), _DEVID_HI(pointer)); + * BE style + */ +#define _DEVID_LO(oid_device_id) \ + (unsigned long long)be64_to_cpup((__be64 *)(oid_device_id)->data) + +#define _DEVID_HI(oid_device_id) \ + (unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1) + +static inline int +pnfs_osd_objid_xdr_sz(void) +{ + return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2; +} + +enum pnfs_osd_version { + PNFS_OSD_MISSING = 0, + PNFS_OSD_VERSION_1 = 1, + PNFS_OSD_VERSION_2 = 2 +}; + +struct pnfs_osd_opaque_cred { + u32 cred_len; + void *cred; +}; + +enum pnfs_osd_cap_key_sec { + PNFS_OSD_CAP_KEY_SEC_NONE = 0, + PNFS_OSD_CAP_KEY_SEC_SSV = 1, +}; + +/* struct pnfs_osd_object_cred4 { + * pnfs_osd_objid4 oc_object_id; + * pnfs_osd_version4 oc_osd_version; + * pnfs_osd_cap_key_sec4 oc_cap_key_sec; + * opaque oc_capability_key<>; + * opaque oc_capability<>; + * }; + */ +struct pnfs_osd_object_cred { + struct pnfs_osd_objid oc_object_id; + u32 oc_osd_version; + u32 oc_cap_key_sec; + struct pnfs_osd_opaque_cred oc_cap_key; + struct pnfs_osd_opaque_cred oc_cap; +}; + +/* struct pnfs_osd_layout4 { + * pnfs_osd_data_map4 olo_map; + * uint32_t olo_comps_index; + * pnfs_osd_object_cred4 olo_components<>; + * }; + */ +struct pnfs_osd_layout { + struct pnfs_osd_data_map olo_map; + u32 olo_comps_index; + u32 olo_num_comps; + struct pnfs_osd_object_cred *olo_comps; +}; + +/* Device Address */ +enum pnfs_osd_targetid_type { + OBJ_TARGET_ANON = 1, + OBJ_TARGET_SCSI_NAME = 2, + OBJ_TARGET_SCSI_DEVICE_ID = 3, +}; + +/* union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) { + * case OBJ_TARGET_SCSI_NAME: + * string oti_scsi_name<>; + * + * case OBJ_TARGET_SCSI_DEVICE_ID: + * opaque oti_scsi_device_id<>; + * + * default: + * void; + * }; + * + * union pnfs_osd_targetaddr4 switch (bool ota_available) { + * case TRUE: + * netaddr4 ota_netaddr; + * case FALSE: + * void; + * }; + * + * struct pnfs_osd_deviceaddr4 { + * pnfs_osd_targetid4 oda_targetid; + * pnfs_osd_targetaddr4 oda_targetaddr; + * uint64_t oda_lun; + * opaque oda_systemid<>; + * pnfs_osd_object_cred4 oda_root_obj_cred; + * opaque oda_osdname<>; + * }; + */ +struct pnfs_osd_targetid { + u32 oti_type; + struct nfs4_string oti_scsi_device_id; +}; + +enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 }; + +/* struct netaddr4 { + * // see struct rpcb in RFC1833 + * string r_netid<>; // network id + * string r_addr<>; // universal address + * }; + */ +struct pnfs_osd_net_addr { + struct nfs4_string r_netid; + struct nfs4_string r_addr; +}; + +struct pnfs_osd_targetaddr { + u32 ota_available; + struct pnfs_osd_net_addr ota_netaddr; +}; + +enum { + NETWORK_ID_MAX = 16 / 4, + UNIVERSAL_ADDRESS_MAX = 64 / 4, + PNFS_OSD_TARGETADDR_MAX = 3 + NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX, +}; + +struct pnfs_osd_deviceaddr { + struct pnfs_osd_targetid oda_targetid; + struct pnfs_osd_targetaddr oda_targetaddr; + u8 oda_lun[8]; + struct nfs4_string oda_systemid; + struct pnfs_osd_object_cred oda_root_obj_cred; + struct nfs4_string oda_osdname; +}; + +enum { + ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4, + PNFS_OSD_DEVICEADDR_MAX = + PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX + + 2 /*oda_lun*/ + + 1 + OSD_SYSTEMID_LEN + + 1 + ODA_OSDNAME_MAX, +}; + +/* LAYOUTCOMMIT: layoutupdate */ + +/* union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) { + * case TRUE: + * int64_t dsu_delta; + * case FALSE: + * void; + * }; + * + * struct pnfs_osd_layoutupdate4 { + * pnfs_osd_deltaspaceused4 olu_delta_space_used; + * bool olu_ioerr_flag; + * }; + */ +struct pnfs_osd_layoutupdate { + u32 dsu_valid; + s64 dsu_delta; + u32 olu_ioerr_flag; +}; + +/* LAYOUTRETURN: I/O Rrror Report */ + +enum pnfs_osd_errno { + PNFS_OSD_ERR_EIO = 1, + PNFS_OSD_ERR_NOT_FOUND = 2, + PNFS_OSD_ERR_NO_SPACE = 3, + PNFS_OSD_ERR_BAD_CRED = 4, + PNFS_OSD_ERR_NO_ACCESS = 5, + PNFS_OSD_ERR_UNREACHABLE = 6, + PNFS_OSD_ERR_RESOURCE = 7 +}; + +/* struct pnfs_osd_ioerr4 { + * pnfs_osd_objid4 oer_component; + * length4 oer_comp_offset; + * length4 oer_comp_length; + * bool oer_iswrite; + * pnfs_osd_errno4 oer_errno; + * }; + */ +struct pnfs_osd_ioerr { + struct pnfs_osd_objid oer_component; + u64 oer_comp_offset; + u64 oer_comp_length; + u32 oer_iswrite; + u32 oer_errno; +}; + +/* OSD XDR API */ +/* Layout helpers */ +/* Layout decoding is done in two parts: + * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part + * of the layout. @iter members need not be initialized. + * Returned: + * @layout members are set. (@layout->olo_comps set to NULL). + * + * Zero on success, or negative error if passed xdr is broken. + * + * 2. 2nd Call pnfs_osd_xdr_decode_layout_comp() in a loop until it returns + * false, to decode the next component. + * Returned: + * true if there is more to decode or false if we are done or error. + * + * Example: + * struct pnfs_osd_xdr_decode_layout_iter iter; + * struct pnfs_osd_layout layout; + * struct pnfs_osd_object_cred comp; + * int status; + * + * status = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); + * if (unlikely(status)) + * goto err; + * while(pnfs_osd_xdr_decode_layout_comp(&comp, &iter, xdr, &status)) { + * // All of @comp strings point to inside the xdr_buffer + * // or scrach buffer. Copy them out to user memory eg. + * copy_single_comp(dest_comp++, &comp); + * } + * if (unlikely(status)) + * goto err; + */ + +struct pnfs_osd_xdr_decode_layout_iter { + unsigned total_comps; + unsigned decoded_comps; +}; + +extern int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, + struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr); + +extern bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp, + struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr, + int *err); + +/* Device Info helpers */ + +/* Note: All strings inside @deviceaddr point to space inside @p. + * @p should stay valid while @deviceaddr is in use. + */ +extern void pnfs_osd_xdr_decode_deviceaddr( + struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p); + +/* layoutupdate (layout_commit) xdr helpers */ +extern int +pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, + struct pnfs_osd_layoutupdate *lou); + +/* osd_ioerror encoding/decoding (layout_return) */ +/* Client */ +extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr); +extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr); + +#endif /* __PNFS_OSD_XDR_H__ */ -- cgit v1.2.3 From d20581aa4be11407c9eeeb75992df5ef176bba0f Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:52:03 +0300 Subject: pnfs: support for non-rpc layout drivers Non-rpc layout driver such as for objects and blocks implement their own I/O path and error handling logic. Therefore bypass NFS-based error handling for these layout drivers. [fix lseg ref-count bugs, and null de-refs] [Fall out from: non-rpc layout drivers] Signed-off-by: Boaz Harrosh [get rid of PNFS_USE_RPC_CODE] [get rid of __nfs4_write_done_cb] [revert useless change in nfs4_write_done_cb] Signed-off-by: Benny Halevy --- include/linux/nfs_xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7e371f7df9c4..7c8ff0984a84 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1087,6 +1087,7 @@ struct nfs_read_data { const struct rpc_call_ops *mds_ops; int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); __u64 mds_offset; + int pnfs_error; struct page *page_array[NFS_PAGEVEC_SIZE]; }; @@ -1112,6 +1113,7 @@ struct nfs_write_data { unsigned long timestamp; /* For lease renewal */ #endif __u64 mds_offset; /* Filelayout dense stripe */ + int pnfs_error; struct page *page_array[NFS_PAGEVEC_SIZE]; }; -- cgit v1.2.3 From cbe8260369c9f88eafa035cd327dc3e02fad528c Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:52:37 +0300 Subject: pnfs: layoutreturn NFSv4.1 LAYOUTRETURN implementation Currently, does not support layout-type payload encoding. Signed-off-by: Alexandros Batsakis Signed-off-by: Andy Adamson Signed-off-by: Andy Adamson Signed-off-by: Dean Hildebrand Signed-off-by: Fred Isaman Signed-off-by: Fred Isaman Signed-off-by: Marc Eshel Signed-off-by: Zhang Jingwang [call pnfs_return_layout right before pnfs_destroy_layout] [remove assert_spin_locked from pnfs_clear_lseg_list] [remove wait parameter from the layoutreturn path.] [remove return_type field from nfs4_layoutreturn_args] [remove range from nfs4_layoutreturn_args] [no need to send layoutcommit from _pnfs_return_layout] [don't wait on sync layoutreturn] [fix layout stateid in layoutreturn args] [fixed NULL deref in _pnfs_return_layout] [removed recaim member of nfs4_layoutreturn_args] Signed-off-by: Benny Halevy --- include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 178fafe0ff93..9376eaf26e15 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -562,6 +562,7 @@ enum { NFSPROC4_CLNT_LAYOUTGET, NFSPROC4_CLNT_GETDEVICEINFO, NFSPROC4_CLNT_LAYOUTCOMMIT, + NFSPROC4_CLNT_LAYOUTRETURN, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7c8ff0984a84..5e8444a11adf 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -269,6 +269,27 @@ struct nfs4_layoutcommit_data { struct nfs4_layoutcommit_res res; }; +struct nfs4_layoutreturn_args { + __u32 layout_type; + struct inode *inode; + nfs4_stateid stateid; + struct nfs4_sequence_args seq_args; +}; + +struct nfs4_layoutreturn_res { + struct nfs4_sequence_res seq_res; + u32 lrs_present; + nfs4_stateid stateid; +}; + +struct nfs4_layoutreturn { + struct nfs4_layoutreturn_args args; + struct nfs4_layoutreturn_res res; + struct rpc_cred *cred; + struct nfs_client *clp; + int rpc_status; +}; + /* * Arguments to the open call. */ -- cgit v1.2.3 From 18ad0a9f2ccd260d37dd6bc5fa04c7819def4c84 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 25 May 2011 21:03:56 +0300 Subject: NFSv4.1: change pg_test return type to bool Signed-off-by: Benny Halevy --- include/linux/nfs_page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 91af2e49fa3a..3a34e80ae92f 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -68,7 +68,7 @@ struct nfs_pageio_descriptor { int pg_ioflags; int pg_error; struct pnfs_layout_segment *pg_lseg; - int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); + bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); }; #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) -- cgit v1.2.3 From 6345d24daf0c1fffe6642081d783cdf653ebaa5c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 May 2011 11:32:28 -0700 Subject: mm: Fix boot crash in mm_alloc() Thomas Gleixner reports that we now have a boot crash triggered by CONFIG_CPUMASK_OFFSTACK=y: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] find_next_bit+0x55/0xb0 Call Trace: [] cpumask_any_but+0x2a/0x70 [] flush_tlb_mm+0x2b/0x80 [] pud_populate+0x35/0x50 [] pgd_alloc+0x9a/0xf0 [] mm_init+0xec/0x120 [] mm_alloc+0x53/0xd0 which was introduced by commit de03c72cfce5 ("mm: convert mm->cpu_vm_cpumask into cpumask_var_t"), and is due to wrong ordering of mm_init() vs mm_init_cpumask Thomas wrote a patch to just fix the ordering of initialization, but I hate the new double allocation in the fork path, so I ended up instead doing some more radical surgery to clean it all up. Reported-by: Thomas Gleixner Reported-by: Ingo Molnar Cc: KOSAKI Motohiro Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 14 ++++++++++++-- include/linux/sched.h | 1 - 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 2a78aae78c69..027935c86c68 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -264,6 +264,8 @@ struct mm_struct { struct linux_binfmt *binfmt; + cpumask_var_t cpu_vm_mask_var; + /* Architecture-specific MM context */ mm_context_t context; @@ -311,10 +313,18 @@ struct mm_struct { #ifdef CONFIG_TRANSPARENT_HUGEPAGE pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif - - cpumask_var_t cpu_vm_mask_var; +#ifdef CONFIG_CPUMASK_OFFSTACK + struct cpumask cpumask_allocation; +#endif }; +static inline void mm_init_cpumask(struct mm_struct *mm) +{ +#ifdef CONFIG_CPUMASK_OFFSTACK + mm->cpu_vm_mask_var = &mm->cpumask_allocation; +#endif +} + /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) { diff --git a/include/linux/sched.h b/include/linux/sched.h index bcddd0138105..2a8621c4be1e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2194,7 +2194,6 @@ static inline void mmdrop(struct mm_struct * mm) if (unlikely(atomic_dec_and_test(&mm->mm_count))) __mmdrop(mm); } -extern int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm); /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); -- cgit v1.2.3 From a1b383870a28cfbd1657d4922c0fafc634a62ebd Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 May 2011 11:14:13 -0600 Subject: virtio: add full three-clause BSD text to headers. It's unclear to me if it's important, but it's obviously causing my technical colleages some headaches and I'd hate such imprecision to slow virtio adoption. I've emailed this to all non-trivial contributors for approval, too. Signed-off-by: Rusty Russell Acked-by: Grant Likely Acked-by: Ryan Harper Acked-by: Anthony Liguori Acked-by: Eric Van Hensbergen Acked-by: john cooper Acked-by: Aneesh Kumar K.V Acked-by: Christian Borntraeger Acked-by: Fernando Luis Vazquez Cao --- include/linux/virtio_9p.h | 25 ++++++++++++++++++++++++- include/linux/virtio_balloon.h | 25 ++++++++++++++++++++++++- include/linux/virtio_blk.h | 25 ++++++++++++++++++++++++- include/linux/virtio_config.h | 25 ++++++++++++++++++++++++- include/linux/virtio_console.h | 26 +++++++++++++++++++++++++- include/linux/virtio_ids.h | 24 +++++++++++++++++++++++- include/linux/virtio_net.h | 25 ++++++++++++++++++++++++- include/linux/virtio_pci.h | 23 +++++++++++++++++++++++ include/linux/virtio_ring.h | 23 +++++++++++++++++++++++ 9 files changed, 214 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index e68b439b2860..277c4ad44e84 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_9P_H #define _LINUX_VIRTIO_9P_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include #include diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index a50ecd1b81a2..652dc8bea921 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_BALLOON_H #define _LINUX_VIRTIO_BALLOON_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 167720d695ed..e0edb40ca7aa 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_BLK_H #define _LINUX_VIRTIO_BLK_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include #include diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 800617b4ddd5..39c88c5ad19d 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_CONFIG_H #define _LINUX_VIRTIO_CONFIG_H /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers. */ + * anyone can use the definitions to implement compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ /* Virtio devices use a standardized configuration space to define their * features and pass configuration information, but each implementation can diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index e4d333543a33..bdf4b0034739 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -5,7 +5,31 @@ #include /* * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers. + * anyone can use the definitions to implement compatible drivers/servers: + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. * * Copyright (C) Red Hat, Inc., 2009, 2010, 2011 * Copyright (C) Amit Shah , 2009, 2010, 2011 diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h index 06660c0a78d7..85bb0bb66ffc 100644 --- a/include/linux/virtio_ids.h +++ b/include/linux/virtio_ids.h @@ -5,7 +5,29 @@ * * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. - */ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #define VIRTIO_ID_NET 1 /* virtio net */ #define VIRTIO_ID_BLOCK 2 /* virtio block */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 085e42298ce5..136040bba3e3 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_NET_H #define _LINUX_VIRTIO_NET_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include #include diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index 9a3d7c48c622..ea66f3f60d63 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -11,6 +11,29 @@ * * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #ifndef _LINUX_VIRTIO_PCI_H diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index e4d144b132b5..a813e5d460eb 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -7,6 +7,29 @@ * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * * Copyright Rusty Russell IBM Corporation 2007. */ #include -- cgit v1.2.3 From 770b31a85e000b0194974922f238a30ade4246b6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:10:17 +0300 Subject: virtio: event index interface Define a new feature bit for the guest and host to utilize an event index (like Xen) instead if a flag bit to enable/disable interrupts and kicks. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- include/linux/virtio_ring.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index a813e5d460eb..c4eef73deb3f 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -52,6 +52,12 @@ /* We support indirect buffer descriptors */ #define VIRTIO_RING_F_INDIRECT_DESC 28 +/* The Guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. */ +/* The Host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. */ +#define VIRTIO_RING_F_EVENT_IDX 29 + /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ struct vring_desc { /* Address (guest-physical). */ @@ -106,6 +112,7 @@ struct vring { * __u16 avail_flags; * __u16 avail_idx; * __u16 available[num]; + * __u16 used_event_idx; * * // Padding to the next align boundary. * char pad[]; @@ -114,8 +121,14 @@ struct vring { * __u16 used_flags; * __u16 used_idx; * struct vring_used_elem used[num]; + * __u16 avail_event_idx; * }; */ +/* We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num]) + static inline void vring_init(struct vring *vr, unsigned int num, void *p, unsigned long align) { @@ -130,7 +143,7 @@ static inline unsigned vring_size(unsigned int num, unsigned long align) { return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) + align - 1) & ~(align - 1)) - + sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num; + + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num; } #ifdef __KERNEL__ -- cgit v1.2.3 From bf7035bf20563a6cadcb9e870406e7b21daf5e30 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:10:27 +0300 Subject: virtio ring: inline function to check for events With the new used_event and avail_event and features, both host and guest need similar logic to check whether events are enabled, so it helps to put the common code in the header. Note that Xen has similar logic for notification hold-off in include/xen/interface/io/ring.h with req_event and req_prod corresponding to event_idx + 1 and new_idx respectively. +1 comes from the fact that req_event and req_prod in Xen start at 1, while event index in virtio starts at 0. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- include/linux/virtio_ring.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index c4eef73deb3f..4a32cb6da425 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -146,6 +146,20 @@ static inline unsigned vring_size(unsigned int num, unsigned long align) + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num; } +/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ +/* Assuming a given event_idx value from the other size, if + * we have just incremented index from old to new_idx, + * should we trigger an event? */ +static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) +{ + /* Note: Xen has similar logic for notification hold-off + * in include/xen/interface/io/ring.h with req_event and req_prod + * corresponding to event_idx + 1 and new_idx respectively. + * Note also that req_event and req_prod in Xen start at 1, + * event indexes in virtio start at 0. */ + return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); +} + #ifdef __KERNEL__ #include struct virtio_device; -- cgit v1.2.3 From 7ab358c23cbf15cea08129cd722d1ce77433a94d Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:11:14 +0300 Subject: virtio: add api for delayed callbacks Add an API that tells the other side that callbacks should be delayed until a lot of work has been done. Implement using the new event_idx feature. Note: it might seem advantageous to let the drivers ask for a callback after a specific capacity has been reached. However, as a single head can free many entries in the descriptor table, we don't really have a clue about capacity until get_buf is called. The API is the simplest to implement at the moment, we'll see what kind of hints drivers can pass when there's more than one user of the feature. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- include/linux/virtio.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index aff5b4f74041..710885749605 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -51,6 +51,13 @@ struct virtqueue { * This re-enables callbacks; it returns "false" if there are pending * buffers in the queue, to detect a possible race between the driver * checking for more work, and enabling callbacks. + * virtqueue_enable_cb_delayed: restart callbacks after disable_cb. + * vq: the struct virtqueue we're talking about. + * This re-enables callbacks but hints to the other side to delay + * interrupts until most of the available buffers have been processed; + * it returns "false" if there are many pending buffers in the queue, + * to detect a possible race between the driver checking for more work, + * and enabling callbacks. * virtqueue_detach_unused_buf: detach first unused buffer * vq: the struct virtqueue we're talking about. * Returns NULL or the "data" token handed to add_buf @@ -86,6 +93,8 @@ void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); +bool virtqueue_enable_cb_delayed(struct virtqueue *vq); + void *virtqueue_detach_unused_buf(struct virtqueue *vq); /** -- cgit v1.2.3 From a1706ac4c0201ea0143dc0db0659001b26ceeabb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 30 May 2011 07:42:51 +0200 Subject: Revert "block: Remove extra discard_alignment from hd_struct." It was not a good idea to start dereferencing disk->queue from the fs sysfs strategy for displaying discard alignment. We ran into first a NULL pointer deref, and after fixing that we sometimes see unvalid disk->queue pointer values. Since discard is the only one of the bunch actually looking into the queue, just revert the change. This reverts commit 23ceb5b7719e9276d4fa72a3ecf94dd396755276. Conflicts: fs/partitions/check.c --- include/linux/genhd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index b78956b3c2e7..300d7582006e 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -100,6 +100,7 @@ struct hd_struct { sector_t start_sect; sector_t nr_sects; sector_t alignment_offset; + unsigned int discard_alignment; struct device __dev; struct kobject *holder_dir; int policy, partno; -- cgit v1.2.3 From ea9d6553b3b3044e7374774cc33bb1b2eee19dd3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 31 May 2011 13:45:53 +0200 Subject: block: remove unwanted semicolons Since those defined functions require additional semicolon from the caller, they could cause potential syntax errors when used in if-else statements. Signed-off-by: Namhyung Kim Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ae9091a68480..1a23722e8878 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1282,8 +1282,8 @@ queue_max_integrity_segments(struct request_queue *q) #define blk_get_integrity(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) -#define blk_integrity_unregister(a) do { } while (0); -#define blk_queue_max_integrity_segments(a, b) do { } while (0); +#define blk_integrity_unregister(a) do { } while (0) +#define blk_queue_max_integrity_segments(a, b) do { } while (0) #define queue_max_integrity_segments(a) (0) #define blk_integrity_merge_rq(a, b, c) (0) #define blk_integrity_merge_bio(a, b, c) (0) -- cgit v1.2.3 From 63da029015b5255915cd6d61f19ffc276ad4635d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 23 May 2011 11:37:09 -0700 Subject: mtd: fix physmap.h warnings Fix build warnings in physmap.h: include/linux/mtd/physmap.h:25: warning: 'struct platform_device' declared inside parameter list include/linux/mtd/physmap.h:25: warning: its scope is only this definition or declaration, which is probably not what you want include/linux/mtd/physmap.h:26: warning: 'struct platform_device' declared inside parameter list include/linux/mtd/physmap.h:27: warning: 'struct platform_device' declared inside parameter list Signed-off-by: Randy Dunlap Signed-off-by: David Woodhouse --- include/linux/mtd/physmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h index d40bfa1d9c91..e5f21d293c70 100644 --- a/include/linux/mtd/physmap.h +++ b/include/linux/mtd/physmap.h @@ -19,6 +19,7 @@ #include struct map_info; +struct platform_device; struct physmap_flash_data { unsigned int width; -- cgit v1.2.3 From 6dd9a7c73761a8a5f5475d5cfdc15368a0f4c06d Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Wed, 25 May 2011 19:13:49 +0100 Subject: intel-iommu: Enable super page (2MiB, 1GiB, etc.) support There are no externally-visible changes with this. In the loop in the internal __domain_mapping() function, we simply detect if we are mapping: - size >= 2MiB, and - virtual address aligned to 2MiB, and - physical address aligned to 2MiB, and - on hardware that supports superpages. (and likewise for larger superpages). We automatically use a superpage for such mappings. We never have to worry about *breaking* superpages, since we trust that we will always *unmap* the same range that was mapped. So all we need to do is ensure that dma_pte_clear_range() will also cope with superpages. Adjust pfn_to_dma_pte() to take a superpage 'level' as an argument, so it can return a PTE at the appropriate level rather than always extending the page tables all the way down to level 1. Again, this is simplified by the fact that we should never encounter existing small pages when we're creating a mapping; any old mapping that used the same virtual range will have been entirely removed and its obsolete page tables freed. Provide an 'intel_iommu=sp_off' argument on the command line as a chicken bit. Not that it should ever be required. == The original commit seen in the iommu-2.6.git was Youquan's implementation (and completion) of my own half-baked code which I'd typed into an email. Followed by half a dozen subsequent 'fixes'. I've taken the unusual step of rewriting history and collapsing the original commits in order to keep the main history simpler, and make life easier for the people who are going to have to backport this to older kernels. And also so I can give it a more coherent commit comment which (hopefully) gives a better explanation of what's going on. The original sequence of commits leading to identical code was: Youquan Song (3): intel-iommu: super page support intel-iommu: Fix superpage alignment calculation error intel-iommu: Fix superpage level calculation error in dma_pfn_level_pte() David Woodhouse (4): intel-iommu: Precalculate superpage support for dmar_domain intel-iommu: Fix hardware_largepage_caps() intel-iommu: Fix inappropriate use of superpages in __domain_mapping() intel-iommu: Fix phys_pfn in __domain_mapping for sglist pages Signed-off-by: Youquan Song Signed-off-by: David Woodhouse --- include/linux/dma_remapping.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 5619f8522738..bbd8661b3473 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -9,8 +9,12 @@ #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) +#define VTD_STRIDE_SHIFT (9) +#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) + #define DMA_PTE_READ (1) #define DMA_PTE_WRITE (2) +#define DMA_PTE_LARGE_PAGE (1 << 7) #define DMA_PTE_SNP (1 << 11) #define CONTEXT_TT_MULTI_LEVEL 0 -- cgit v1.2.3 From 333ba7325213f0a09dfa5ceeddb056d6ad74b3b5 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Sun, 29 May 2011 15:53:20 +0300 Subject: cfg80211: don't drop p2p probe responses Commit 0a35d36 ("cfg80211: Use capability info to detect mesh beacons") assumed that probe response with both ESS and IBSS bits cleared means that the frame was sent by a mesh sta. However, these capabilities are also being used in the p2p_find phase, and the mesh-validation broke it. Rename the WLAN_CAPABILITY_IS_MBSS macro, and verify that mesh ies exist before assuming this frame was sent by a mesh sta. Signed-off-by: Eliad Peller Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index b2eee5879883..bf56b6f78270 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1003,8 +1003,12 @@ struct ieee80211_ht_info { #define WLAN_CAPABILITY_ESS (1<<0) #define WLAN_CAPABILITY_IBSS (1<<1) -/* A mesh STA sets the ESS and IBSS capability bits to zero */ -#define WLAN_CAPABILITY_IS_MBSS(cap) \ +/* + * A mesh STA sets the ESS and IBSS capability bits to zero. + * however, this holds true for p2p probe responses (in the p2p_find + * phase) as well. + */ +#define WLAN_CAPABILITY_IS_STA_BSS(cap) \ (!((cap) & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS))) #define WLAN_CAPABILITY_CF_POLLABLE (1<<2) -- cgit v1.2.3 From a3bcc23e890a6d49d6763d9eb073d711de2e0469 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 1 Jun 2011 06:49:10 +0000 Subject: af-packet: Add flag to distinguish VID 0 from no-vlan. Currently, user-space cannot determine if a 0 tcp_vlan_tci means there is no VLAN tag or the VLAN ID was zero. Add flag to make this explicit. User-space can check for TP_STATUS_VLAN_VALID || tp_vlan_tci > 0, which will be backwards compatible. Older could would have just checked for tp_vlan_tci, so it will work no worse than before. Signed-off-by: Ben Greear Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/if_packet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index 72bfa5a034dd..6d66ce1791a9 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -70,6 +70,7 @@ struct tpacket_auxdata { #define TP_STATUS_COPY 0x2 #define TP_STATUS_LOSING 0x4 #define TP_STATUS_CSUMNOTREADY 0x8 +#define TP_STATUS_VLAN_VALID 0x10 /* auxdata has valid tp_vlan_tci */ /* Tx ring - header status */ #define TP_STATUS_AVAILABLE 0x0 -- cgit v1.2.3 From 55db4c64eddf37e31279ec15fe90314713bc9cfa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 4 Jun 2011 06:33:24 +0900 Subject: Revert "tty: make receive_buf() return the amout of bytes received" This reverts commit b1c43f82c5aa265442f82dba31ce985ebb7aa71c. It was broken in so many ways, and results in random odd pty issues. It re-introduced the buggy schedule_work() in flush_to_ldisc() that can cause endless work-loops (see commit a5660b41af6a: "tty: fix endless work loop when the buffer fills up"). It also used an "unsigned int" return value fo the ->receive_buf() function, but then made multiple functions return a negative error code, and didn't actually check for the error in the caller. And it didn't actually work at all. BenH bisected down odd tty behavior to it: "It looks like the patch is causing some major malfunctions of the X server for me, possibly related to PTYs. For example, cat'ing a large file in a gnome terminal hangs the kernel for -minutes- in a loop of what looks like flush_to_ldisc/workqueue code, (some ftrace data in the quoted bits further down). ... Some more data: It -looks- like what happens is that the flush_to_ldisc work queue entry constantly re-queues itself (because the PTY is full ?) and the workqueue thread will basically loop forver calling it without ever scheduling, thus starving the consumer process that could have emptied the PTY." which is pretty much exactly the problem we fixed in a5660b41af6a. Milton Miller pointed out the 'unsigned int' issue. Reported-by: Benjamin Herrenschmidt Reported-by: Milton Miller Cc: Stefan Bigler Cc: Toby Gray Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/tty_ldisc.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index 5b07792ccb46..ff7dc08696a8 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -76,7 +76,7 @@ * tty device. It is solely the responsibility of the line * discipline to handle poll requests. * - * unsigned int (*receive_buf)(struct tty_struct *, const unsigned char *cp, + * void (*receive_buf)(struct tty_struct *, const unsigned char *cp, * char *fp, int count); * * This function is called by the low-level tty driver to send @@ -84,8 +84,7 @@ * processing. is a pointer to the buffer of input * character received by the device. is a pointer to a * pointer of flag bytes which indicate whether a character was - * received with a parity error, etc. Returns the amount of bytes - * received. + * received with a parity error, etc. * * void (*write_wakeup)(struct tty_struct *); * @@ -141,8 +140,8 @@ struct tty_ldisc_ops { /* * The following routines are called from below. */ - unsigned int (*receive_buf)(struct tty_struct *, - const unsigned char *cp, char *fp, int count); + void (*receive_buf)(struct tty_struct *, const unsigned char *cp, + char *fp, int count); void (*write_wakeup)(struct tty_struct *); void (*dcd_change)(struct tty_struct *, unsigned int, struct pps_event_time *); -- cgit v1.2.3