From e98b6fed84d0f0155d7b398e0dfeac74c792f2d0 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Nov 2010 12:24:53 -0800 Subject: ceph: fix comment, remove extraneous args The offset/length arguments aren't used. Signed-off-by: Sage Weil --- include/linux/ceph/libceph.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index f22b2e941686..9e76d35670d2 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -227,8 +227,7 @@ extern int ceph_open_session(struct ceph_client *client); extern void ceph_release_page_vector(struct page **pages, int num_pages); extern struct page **ceph_get_direct_page_vector(const char __user *data, - int num_pages, - loff_t off, size_t len); + int num_pages); extern void ceph_put_page_vector(struct page **pages, int num_pages); extern void ceph_release_page_vector(struct page **pages, int num_pages); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); -- cgit v1.2.3 From b7495fc2ff941db6a118a93ab8d61149e3f4cef8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Nov 2010 12:43:12 -0800 Subject: ceph: make page alignment explicit in osd interface We used to infer alignment of IOs within a page based on the file offset, which assumed they matched. This broke with direct IO that was not aligned to pages (e.g., 512-byte aligned IO). We were also trusting the alignment specified in the OSD reply, which could have been adjusted by the server. Explicitly specify the page alignment when setting up OSD IO requests. Signed-off-by: Sage Weil --- include/linux/ceph/osd_client.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 6c91fb032c39..a1af29648fb5 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -79,6 +79,7 @@ struct ceph_osd_request { struct ceph_file_layout r_file_layout; struct ceph_snap_context *r_snapc; /* snap context for writes */ unsigned r_num_pages; /* size of page array (follows) */ + unsigned r_page_alignment; /* io offset in first page */ struct page **r_pages; /* pages for data payload */ int r_pages_from_pool; int r_own_pages; /* if true, i own page list */ @@ -194,7 +195,8 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, int do_sync, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - bool use_mempool, int num_reply); + bool use_mempool, int num_reply, + int page_align); static inline void ceph_osdc_get_request(struct ceph_osd_request *req) { @@ -218,7 +220,8 @@ extern int ceph_osdc_readpages(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, u64 off, u64 *plen, u32 truncate_seq, u64 truncate_size, - struct page **pages, int nr_pages); + struct page **pages, int nr_pages, + int page_align); extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, -- cgit v1.2.3 From c5c6b19d4b8f5431fca05f28ae9e141045022149 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Nov 2010 12:40:00 -0800 Subject: ceph: explicitly specify page alignment in network messages The alignment used for reading data into or out of pages used to be taken from the data_off field in the message header. This only worked as long as the page alignment matched the object offset, breaking direct io to non-page aligned offsets. Instead, explicitly specify the page alignment next to the page vector in the ceph_msg struct, and use that instead of the message header (which probably shouldn't be trusted). The alloc_msg callback is responsible for filling in this field properly when it sets up the page vector. Signed-off-by: Sage Weil --- include/linux/ceph/messenger.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 5956d62c3057..a108b425fee2 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -82,6 +82,7 @@ struct ceph_msg { struct ceph_buffer *middle; struct page **pages; /* data payload. NOT OWNER. */ unsigned nr_pages; /* size of page array */ + unsigned page_alignment; /* io offset in first page */ struct ceph_pagelist *pagelist; /* instead of pages */ struct list_head list_head; struct kref kref; -- cgit v1.2.3 From aae6d3ddd8b90f5b2c8d79a2b914d1706d124193 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 17 Sep 2010 15:02:32 -0700 Subject: sched: Use group weight, idle cpu metrics to fix imbalances during idle Currently we consider a sched domain to be well balanced when the imbalance is less than the domain's imablance_pct. As the number of cores and threads are increasing, current values of imbalance_pct (for example 25% for a NUMA domain) are not enough to detect imbalances like: a) On a WSM-EP system (two sockets, each having 6 cores and 12 logical threads), 24 cpu-hogging tasks get scheduled as 13 on one socket and 11 on another socket. Leading to an idle HT cpu. b) On a hypothetial 2 socket NHM-EX system (each socket having 8 cores and 16 logical threads), 16 cpu-hogging tasks can get scheduled as 9 on one socket and 7 on another socket. Leaving one core in a socket idle whereas in another socket we have a core having both its HT siblings busy. While this issue can be fixed by decreasing the domain's imbalance_pct (by making it a function of number of logical cpus in the domain), it can potentially cause more task migrations across sched groups in an overloaded case. Fix this by using imbalance_pct only during newly_idle and busy load balancing. And during idle load balancing, check if there is an imbalance in number of idle cpu's across the busiest and this sched_group or if the busiest group has more tasks than its weight that the idle cpu in this_group can pull. Reported-by: Nikhil Rao Signed-off-by: Suresh Siddha Signed-off-by: Peter Zijlstra LKML-Reference: <1284760952.2676.11.camel@sbsiddha-MOBL3.sc.intel.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d0036e52a24a..2c79e921a68b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -862,6 +862,7 @@ struct sched_group { * single CPU. */ unsigned int cpu_power, cpu_power_orig; + unsigned int group_weight; /* * The CPUs this group covers. -- cgit v1.2.3 From 3b42a96dc7870c53d20b419185737d3b8f7a7b74 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Mon, 15 Nov 2010 06:01:59 +0000 Subject: net: rtnetlink.h -- only include linux/netdevice.h when used by the kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit below added a new helper dev_ingress_queue to cleanly obtain the ingress queue pointer. This necessitated including 'linux/netdevice.h': commit 24824a09e35402b8d58dcc5be803a5ad3937bdba Author: Eric Dumazet Date: Sat Oct 2 06:11:55 2010 +0000 net: dynamic ingress_queue allocation However this include triggers issues for applications in userspace which use the rtnetlink interfaces. Commonly this requires they include 'net/if.h' and 'linux/rtnetlink.h' leading to a compiler error as below: In file included from /usr/include/linux/netdevice.h:28:0, from /usr/include/linux/rtnetlink.h:9, from t.c:2: /usr/include/linux/if.h:135:8: error: redefinition of ‘struct ifmap’ /usr/include/net/if.h:112:8: note: originally defined here /usr/include/linux/if.h:169:8: error: redefinition of ‘struct ifreq’ /usr/include/net/if.h:127:8: note: originally defined here /usr/include/linux/if.h:218:8: error: redefinition of ‘struct ifconf’ /usr/include/net/if.h:177:8: note: originally defined here The new helper is only defined for the kernel and protected by __KERNEL__ therefore we can simply pull the include down into the same protected section. Signed-off-by: Andy Whitcroft Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index d42f274418b8..bbad657a3725 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -6,7 +6,6 @@ #include #include #include -#include /* rtnetlink families. Values up to 127 are reserved for real address * families, values above 128 may be used arbitrarily. @@ -606,6 +605,7 @@ struct tcamsg { #ifdef __KERNEL__ #include +#include static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str) { -- cgit v1.2.3 From 8e35f8e7c61c88f9a979a4e6f7f4ffd4c158a88a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Nov 2010 09:11:55 -0400 Subject: NLM: Fix a regression in lockd Nick Bowler reports: There are no unusual messages on the client... but I just logged into the server and I see lots of messages of the following form: nfsd: request from insecure port (192.168.8.199:35766)! nfsd: request from insecure port (192.168.8.199:35766)! nfsd: request from insecure port (192.168.8.199:35766)! nfsd: request from insecure port (192.168.8.199:35766)! nfsd: request from insecure port (192.168.8.199:35766)! Bisected to commit 9247685088398cf21bcb513bd2832b4cd42516c4 (SUNRPC: Properly initialize sock_xprt.srcaddr in all cases) Apparently, removing the 'transport->srcaddr.ss_family = family' from xs_create_sock() triggers this due to nlmclnt_lookup_host() incorrectly initialising the srcaddr family to AF_UNSPEC. Reported-by: Nick Bowler Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index a34dea46b629..2dee05e5119a 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -43,6 +43,7 @@ struct nlm_host { struct sockaddr_storage h_addr; /* peer address */ size_t h_addrlen; struct sockaddr_storage h_srcaddr; /* our address (optional) */ + size_t h_srcaddrlen; struct rpc_clnt *h_rpcclnt; /* RPC client to talk to peer */ char *h_name; /* remote hostname */ u32 h_version; /* interface version */ -- cgit v1.2.3 From 5685b971362651ee3d99ff3cc512c3bbd049d34d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 12 Nov 2010 09:23:04 -0500 Subject: nfs: trivial: remove unused nfs_wait_event macro Nothing uses this macro anymore. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index bba26684acdc..c66fdb7d6998 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -593,12 +593,6 @@ nfs_fileid_to_ino_t(u64 fileid) return ino; } -#define nfs_wait_event(clnt, wq, condition) \ -({ \ - int __retval = wait_event_killable(wq, condition); \ - __retval; \ -}) - #define NFS_JUKEBOX_RETRY_TIME (5 * HZ) #endif /* __KERNEL__ */ -- cgit v1.2.3 From f281233d3eba15fb225d21ae2e228fd4553d824a Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Tue, 16 Nov 2010 02:10:29 -0500 Subject: SCSI host lock push-down Move the mid-layer's ->queuecommand() invocation from being locked with the host lock to being unlocked to facilitate speeding up the critical path for drivers who don't need this lock taken anyway. The patch below presents a simple SCSI host lock push-down as an equivalent transformation. No locking or other behavior should change with this patch. All existing bugs and locking orders are preserved. Additionally, add one parameter to queuecommand, struct Scsi_Host * and remove one parameter from queuecommand, void (*done)(struct scsi_cmnd *) Scsi_Host* is a convenient pointer that most host drivers need anyway, and 'done' is redundant to struct scsi_cmnd->scsi_done. Minimal code disturbance was attempted with this change. Most drivers needed only two one-line modifications for their host lock push-down. Signed-off-by: Jeff Garzik Acked-by: James Bottomley Signed-off-by: Linus Torvalds --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 15b77b8dc7e1..d947b1231662 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -986,7 +986,7 @@ extern void ata_host_init(struct ata_host *, struct device *, unsigned long, struct ata_port_operations *); extern int ata_scsi_detect(struct scsi_host_template *sht); extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg); -extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)); +extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd); extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev, int cmd, void __user *arg); extern void ata_sas_port_destroy(struct ata_port *); -- cgit v1.2.3 From 451a3c24b0135bce54542009b5fde43846c7cf67 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 Nov 2010 16:26:55 +0100 Subject: BKL: remove extraneous #include The big kernel lock has been removed from all these files at some point, leaving only the #include. Remove this too as a cleanup. Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- include/linux/hardirq.h | 1 - include/linux/reiserfs_fs.h | 1 - include/linux/tty.h | 1 - 3 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 41cb31f14ee3..8f3f467c57c6 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -3,7 +3,6 @@ #include #ifdef CONFIG_PREEMPT -#include #endif #include #include diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 5ca47e59b727..c21072adbfad 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/tty.h b/include/linux/tty.h index c7ea9bc8897c..032d79ff1d9d 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -13,7 +13,6 @@ #include #include #include -#include #include -- cgit v1.2.3 From 7957f0a857754c555e07f58a3fb83ac29501478c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 17 Nov 2010 14:58:36 -0800 Subject: Fix build failure due to hwirq.h needing smp_lock.h Arnd Bergmann did an automated scripting run to find left-over instances of , and had made it trigger it on the normal BKL use of lock_kernel and unlock_lernel (and apparently release_kernel_lock and reacquire_kernel_lock too, used by the scheduler). That resulted in commit 451a3c24b013 ("BKL: remove extraneous #include "). However, hardirq.h was the only remaining user of the old 'kernel_locked()' interface, and Arnd's script hadn't checked for that. So depending on your configuration and what header files had been included, you would get errors like "implicit declaration of function 'kernel_locked'" during the build. The right fix is not to just re-instate the smp_lock.h include - it is to just remove 'kernel_locked()' entirely, since the only use was this one special low-level detail. Just make hardirq.h do it directly. In fact this simplifies and clarifies the code, because some trivial analysis makes it clear that hardirq.h only ever used _one_ of the two definitions of kernel_locked(), so we can remove the other one entirely. Reported-by: Zimny Lech Reported-and-acked-by: Randy Dunlap Acked-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- include/linux/hardirq.h | 2 +- include/linux/smp_lock.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 8f3f467c57c6..bea1612d8f5c 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -96,7 +96,7 @@ #define in_nmi() (preempt_count() & NMI_MASK) #if defined(CONFIG_PREEMPT) && defined(CONFIG_BKL) -# define PREEMPT_INATOMIC_BASE kernel_locked() +# define PREEMPT_INATOMIC_BASE (current->lock_depth >= 0) #else # define PREEMPT_INATOMIC_BASE 0 #endif diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index 291f721144c2..3a1988202731 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -4,8 +4,6 @@ #ifdef CONFIG_LOCK_KERNEL #include -#define kernel_locked() (current->lock_depth >= 0) - extern int __lockfunc __reacquire_kernel_lock(void); extern void __lockfunc __release_kernel_lock(void); @@ -58,7 +56,6 @@ static inline void cycle_kernel_lock(void) #define lock_kernel() #define unlock_kernel() #define cycle_kernel_lock() do { } while(0) -#define kernel_locked() 1 #endif /* CONFIG_BKL */ #define release_kernel_lock(task) do { } while(0) -- cgit v1.2.3 From 0a5b871ea4c6bfb2723ac2ffc7ef5c32452abb89 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 17 Nov 2010 18:36:25 -0800 Subject: hardirq.h: remove now-empty #ifdef/#endif pair Commit 451a3c24b013 ("BKL: remove extraneous #include ") removed the #include line that was the only thing that was surrounded by the #ifdef/#endif. So now that #ifdef is guarding nothing at all. Just remove it. Reported-by: Byeong-ryeol Kim Signed-off-by: Linus Torvalds --- include/linux/hardirq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index bea1612d8f5c..714da7e5d10c 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -2,8 +2,6 @@ #define LINUX_HARDIRQ_H #include -#ifdef CONFIG_PREEMPT -#endif #include #include #include -- cgit v1.2.3 From ed1d77b18c9f4ff06d5b42c65041aa55a1447053 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 18 Nov 2010 10:56:29 -0800 Subject: hardirq.h: needs sched.h if using BKL This really isn't the right thing to do, and strictly speaking we should have the BKL depth count in the thread info right next to the preempt count. The two really do go together. However, since that would involve a patch to all architectures, and the BKL is finally going away, it's simply not worth the effort to do the RightThing(tm). Just re-instate the include that we used to get accidentally from the smp_lock.h one. This is all fallout from the same old "BKL: remove extraneous #include " commit. Reported-by: Ingo Molnar Tested-by: Randy Dunlap Cc: Arnd Bergmann Signed-off-by: Linus Torvalds --- include/linux/hardirq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 714da7e5d10c..32f9fd6619b4 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -94,6 +94,7 @@ #define in_nmi() (preempt_count() & NMI_MASK) #if defined(CONFIG_PREEMPT) && defined(CONFIG_BKL) +# include # define PREEMPT_INATOMIC_BASE (current->lock_depth >= 0) #else # define PREEMPT_INATOMIC_BASE 0 -- cgit v1.2.3 From 93bb41f4f8b89ac8b4d0a734bc59634cb0a29a89 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 19 Nov 2010 21:18:35 -0500 Subject: fs: Do not dispatch FITRIM through separate super_operation There was concern that FITRIM ioctl is not common enough to be included in core vfs ioctl, as Christoph Hellwig pointed out there's no real point in dispatching this out to a separate vector instead of just through ->ioctl. So this commit removes ioctl_fstrim() from vfs ioctl and trim_fs from super_operation structure. Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 334d68a17108..eedc00b7b1ee 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1612,7 +1612,6 @@ struct super_operations { ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); #endif int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); - int (*trim_fs) (struct super_block *, struct fstrim_range *); }; /* -- cgit v1.2.3