From 34c06254ff82a815fdccdfae7517a06c9b768cee Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 5 Nov 2015 00:12:24 -0500 Subject: cgroup: fix cftype->file_offset handling 6f60eade2433 ("cgroup: generalize obtaining the handles of and notifying cgroup files") introduced cftype->file_offset so that the handles for per-css file instances can be recorded. These handles then can be used, for example, to generate file modified notifications. Unfortunately, it made the wrong assumption that files are created once for a given css and removed on its destruction. Due to the dependencies among subsystems, a css may be hidden from userland and then later shown again. This is implemented by removing and re-creating the affected files, so the associated kernfs_node for a given cgroup file may change over time. This incorrect assumption led to the corruption of css->files lists. Reimplement cftype->file_offset handling so that cgroup_file->kn is protected by a lock and updated as files are created and destroyed. This also makes keeping them on per-cgroup list unnecessary. Signed-off-by: Tejun Heo Reported-by: James Sedgwick Fixes: 6f60eade2433 ("cgroup: generalize obtaining the handles of and notifying cgroup files") Acked-by: Johannes Weiner Acked-by: Zefan Li --- include/linux/cgroup-defs.h | 4 ---- include/linux/cgroup.h | 14 +------------- 2 files changed, 1 insertion(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 60d44b26276d..869fd4a3d28e 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -90,7 +90,6 @@ enum { */ struct cgroup_file { /* do not access any fields from outside cgroup core */ - struct list_head node; /* anchored at css->files */ struct kernfs_node *kn; }; @@ -134,9 +133,6 @@ struct cgroup_subsys_state { */ u64 serial_nr; - /* all cgroup_files associated with this css */ - struct list_head files; - /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; struct work_struct destroy_work; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 22e3754f89c5..f64083030ad5 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -88,6 +88,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); +void cgroup_file_notify(struct cgroup_file *cfile); char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); @@ -516,19 +517,6 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) pr_cont_kernfs_path(cgrp->kn); } -/** - * cgroup_file_notify - generate a file modified event for a cgroup_file - * @cfile: target cgroup_file - * - * @cfile must have been obtained by setting cftype->file_offset. - */ -static inline void cgroup_file_notify(struct cgroup_file *cfile) -{ - /* might not have been created due to one of the CFTYPE selector flags */ - if (cfile->kn) - kernfs_notify(cfile->kn); -} - #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; -- cgit v1.2.3 From 451c2b5caf37b526ae34a1081b71115e1de2d063 Mon Sep 17 00:00:00 2001 From: Aya Mahfouz Date: Wed, 18 Nov 2015 08:36:44 +0200 Subject: net: dns_resolver: convert time_t to time64_t Changes the definition of the pointer _expiry from time_t to time64_t. This is to handle the Y2038 problem where time_t will overflow in the year 2038. The change is safe because the kernel subsystems that call dns_query pass NULL. Signed-off-by: Arnd Bergmann Signed-off-by: Aya Mahfouz Signed-off-by: David S. Miller --- include/linux/dns_resolver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dns_resolver.h b/include/linux/dns_resolver.h index cc92268af89a..6ac3cad9aef1 100644 --- a/include/linux/dns_resolver.h +++ b/include/linux/dns_resolver.h @@ -27,7 +27,7 @@ #ifdef __KERNEL__ extern int dns_query(const char *type, const char *name, size_t namelen, - const char *options, char **_result, time_t *_expiry); + const char *options, char **_result, time64_t *_expiry); #endif /* KERNEL */ -- cgit v1.2.3 From 614e4c4ebc75517295bccd29b20ddbc5b52af6fc Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 12 Nov 2015 11:00:04 +0100 Subject: perf/core: Robustify the perf_cgroup_from_task() RCU checks This patch reinforces the lockdep checks performed by perf_cgroup_from_tsk() by passing the perf_event_context whenever possible. It is okay to not hold the RCU read lock when we know we hold the ctx->lock. This patch makes sure this property holds. In some functions, such as perf_cgroup_sched_in(), we do not pass the context because we are sure we are holding the RCU read lock. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: edumazet@google.com Link: http://lkml.kernel.org/r/1447322404-10920-3-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d841d33bcdc9..f9828a48f16a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -697,9 +697,11 @@ struct perf_cgroup { * if there is no cgroup event for the current CPU context. */ static inline struct perf_cgroup * -perf_cgroup_from_task(struct task_struct *task) +perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx) { - return container_of(task_css(task, perf_event_cgrp_id), + return container_of(task_css_check(task, perf_event_cgrp_id, + ctx ? lockdep_is_held(&ctx->lock) + : true), struct perf_cgroup, css); } #endif /* CONFIG_CGROUP_PERF */ -- cgit v1.2.3 From 90eec103b96e30401c0b846045bf8a1c7159b6da Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 16 Nov 2015 11:08:45 +0100 Subject: treewide: Remove old email address There were still a number of references to my old Red Hat email address in the kernel source. Remove these while keeping the Red Hat copyright notices intact. Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 2 +- include/linux/lockdep.h | 2 +- include/linux/proportions.h | 2 +- include/linux/uprobes.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 8dde55974f18..0536524bb9eb 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -5,7 +5,7 @@ * Jump label support * * Copyright (C) 2009-2012 Jason Baron - * Copyright (C) 2011-2012 Peter Zijlstra + * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra * * DEPRECATED API: * diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 70400dc7660f..c57e424d914b 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -2,7 +2,7 @@ * Runtime locking correctness validator * * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra * * see Documentation/locking/lockdep-design.txt for more details. */ diff --git a/include/linux/proportions.h b/include/linux/proportions.h index 5440f64d2942..21221338ad18 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h @@ -1,7 +1,7 @@ /* * FLoating proportions * - * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra * * This file contains the public data structure and API definitions. */ diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 0bdc72f36905..4a29c75b146e 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -21,7 +21,7 @@ * Authors: * Srikar Dronamraju * Jim Keniston - * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra */ #include -- cgit v1.2.3 From c9da161c6517ba12154059d3b965c2cbaf16f90f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 24 Nov 2015 21:28:15 +0100 Subject: bpf: fix clearing on persistent program array maps Currently, when having map file descriptors pointing to program arrays, there's still the issue that we unconditionally flush program array contents via bpf_fd_array_map_clear() in bpf_map_release(). This happens when such a file descriptor is released and is independent of the map's refcount. Having this flush independent of the refcount is for a reason: there can be arbitrary complex dependency chains among tail calls, also circular ones (direct or indirect, nesting limit determined during runtime), and we need to make sure that the map drops all references to eBPF programs it holds, so that the map's refcount can eventually drop to zero and initiate its freeing. Btw, a walk of the whole dependency graph would not be possible for various reasons, one being complexity and another one inconsistency, i.e. new programs can be added to parts of the graph at any time, so there's no guaranteed consistent state for the time of such a walk. Now, the program array pinning itself works, but the issue is that each derived file descriptor on close would nevertheless call unconditionally into bpf_fd_array_map_clear(). Instead, keep track of users and postpone this flush until the last reference to a user is dropped. As this only concerns a subset of references (f.e. a prog array could hold a program that itself has reference on the prog array holding it, etc), we need to track them separately. Short analysis on the refcounting: on map creation time usercnt will be one, so there's no change in behaviour for bpf_map_release(), if unpinned. If we already fail in map_create(), we are immediately freed, and no file descriptor has been made public yet. In bpf_obj_pin_user(), we need to probe for a possible map in bpf_fd_probe_obj() already with a usercnt reference, so before we drop the reference on the fd with fdput(). Therefore, if actual pinning fails, we need to drop that reference again in bpf_any_put(), otherwise we keep holding it. When last reference drops on the inode, the bpf_any_put() in bpf_evict_inode() will take care of dropping the usercnt again. In the bpf_obj_get_user() case, the bpf_any_get() will grab a reference on the usercnt, still at a time when we have the reference on the path. Should we later on fail to grab a new file descriptor, bpf_any_put() will drop it, otherwise we hold it until bpf_map_release() time. Joint work with Alexei. Fixes: b2197755b263 ("bpf: add support for persistent maps/progs") Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index de464e6683b6..83d1926c61e4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -40,6 +40,7 @@ struct bpf_map { struct user_struct *user; const struct bpf_map_ops *ops; struct work_struct work; + atomic_t usercnt; }; struct bpf_map_type_list { @@ -167,8 +168,10 @@ struct bpf_prog *bpf_prog_get(u32 ufd); void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_put_rcu(struct bpf_prog *prog); -struct bpf_map *bpf_map_get(u32 ufd); +struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); +void bpf_map_inc(struct bpf_map *map, bool uref); +void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); extern int sysctl_unprivileged_bpf_disabled; -- cgit v1.2.3 From ca369d51b3e1649be4a72addd6d6a168cfb3f537 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 13 Nov 2015 16:46:48 -0500 Subject: block/sd: Fix device-imposed transfer length limits Commit 4f258a46346c ("sd: Fix maximum I/O size for BLOCK_PC requests") had the unfortunate side-effect of removing an implicit clamp to BLK_DEF_MAX_SECTORS for REQ_TYPE_FS requests in the block layer code. This caused problems for some SMR drives. Debugging this issue revealed a few problems with the existing infrastructure since the block layer didn't know how to deal with device-imposed limits, only limits set by the I/O controller. - Introduce a new queue limit, max_dev_sectors, which is used by the ULD to signal the maximum sectors for a REQ_TYPE_FS request. - Ensure that max_dev_sectors is correctly stacked and taken into account when overriding max_sectors through sysfs. - Rework sd_read_block_limits() so it saves the max_xfer and opt_xfer values for later processing. - In sd_revalidate() set the queue's max_dev_sectors based on the MAXIMUM TRANSFER LENGTH value in the Block Limits VPD. If this value is not reported, fall back to a cap based on the CDB TRANSFER LENGTH field size. - In sd_revalidate(), use OPTIMAL TRANSFER LENGTH from the Block Limits VPD--if reported and sane--to signal the preferred device transfer size for FS requests. Otherwise use BLK_DEF_MAX_SECTORS. - blk_limits_max_hw_sectors() is no longer used and can be removed. Signed-off-by: Martin K. Petersen Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=93581 Reviewed-by: Christoph Hellwig Tested-by: sweeneygj@gmx.com Tested-by: Arzeets Tested-by: David Eisner Tested-by: Mario Kicherer Signed-off-by: Martin K. Petersen --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 38a5ff772a37..9dacb745fa96 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -253,6 +253,7 @@ struct queue_limits { unsigned long virt_boundary_mask; unsigned int max_hw_sectors; + unsigned int max_dev_sectors; unsigned int chunk_sectors; unsigned int max_sectors; unsigned int max_segment_size; @@ -948,7 +949,6 @@ extern struct request_queue *blk_init_allocated_queue(struct request_queue *, extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); -extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_segments(struct request_queue *, unsigned short); -- cgit v1.2.3 From 08236c6bb2980561fba657c58fdc76f2865f236c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Sat, 28 Nov 2015 16:49:27 +0100 Subject: lightnvm: unconverted ppa returned in get_bb_tbl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The get_bb_tbl function takes ppa as a generic address, which is converted to the ppa device address within the device driver. When the update_bbtbl callback is called from get_bb_tbl, the device specific ppa is used, instead of the generic ppa. Make sure to pass the generic ppa. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 3db5552b17d5..c6916aec43b6 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -179,7 +179,7 @@ typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *); typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *); typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32, nvm_l2p_update_fn *, void *); -typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, struct ppa_addr, int, +typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int, nvm_bb_update_fn *, void *); typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int); typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *); -- cgit v1.2.3 From bf4e6b4e757488dee1b6a581f49c7ac34cd217f8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 26 Nov 2015 08:46:57 +0100 Subject: block: Always check queue limits for cloned requests When a cloned request is retried on other queues it always needs to be checked against the queue limits of that queue. Otherwise the calculations for nr_phys_segments might be wrong, leading to a crash in scsi_init_sgtable(). To clarify this the patch renames blk_rq_check_limits() to blk_cloned_rq_check_limits() and removes the symbol export, as the new function should only be used for cloned requests and never exported. Cc: Mike Snitzer Cc: Ewan Milne Cc: Jeff Moyer Signed-off-by: Hannes Reinecke Fixes: e2a60da74 ("block: Clean up special command handling logic") Cc: stable@vger.kernel.org # 3.7+ Acked-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c0d2b7927c1f..c06f8eaa42ff 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -773,7 +773,6 @@ extern void blk_rq_set_block_pc(struct request *); extern void blk_requeue_request(struct request_queue *, struct request *); extern void blk_add_request_payload(struct request *rq, struct page *page, unsigned int len); -extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, struct bio_set *bs, gfp_t gfp_mask, -- cgit v1.2.3 From 880621c2605b82eb5af91a2c94223df6f5a3fb64 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 22 Nov 2015 17:46:09 +0100 Subject: packet: Allow packets with only a header (but no payload) Commit 9c7077622dd91 ("packet: make packet_snd fail on len smaller than l2 header") added validation for the packet size in packet_snd. This change enforces that every packet needs a header (with at least hard_header_len bytes) plus a payload with at least one byte. Before this change the payload was optional. This fixes PPPoE connections which do not have a "Service" or "Host-Uniq" configured (which is violating the spec, but is still widely used in real-world setups). Those are currently failing with the following message: "pppd: packet size is too short (24 <= 24)" Signed-off-by: Martin Blumenstingl Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 67bfac1abfc1..3b5d134e945a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1398,7 +1398,8 @@ enum netdev_priv_flags { * @dma: DMA channel * @mtu: Interface MTU value * @type: Interface hardware type - * @hard_header_len: Hardware header length + * @hard_header_len: Hardware header length, which means that this is the + * minimum size of a packet. * * @needed_headroom: Extra headroom the hardware may need, but not in all * cases can this be guaranteed -- cgit v1.2.3 From 9cd3e072b0be17446e37d7414eac8a3499e0601e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Nov 2015 20:03:10 -0800 Subject: net: rename SOCK_ASYNC_NOSPACE and SOCK_ASYNC_WAITDATA This patch is a cleanup to make following patch easier to review. Goal is to move SOCK_ASYNC_NOSPACE and SOCK_ASYNC_WAITDATA from (struct socket)->flags to a (struct socket_wq)->flags to benefit from RCU protection in sock_wake_async() To ease backports, we rename both constants. Two new helpers, sk_set_bit(int nr, struct sock *sk) and sk_clear_bit(int net, struct sock *sk) are added so that following patch can change their implementation. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/net.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 70ac5e28e6b7..f514e4dd5521 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -34,8 +34,8 @@ struct inode; struct file; struct net; -#define SOCK_ASYNC_NOSPACE 0 -#define SOCK_ASYNC_WAITDATA 1 +#define SOCKWQ_ASYNC_NOSPACE 0 +#define SOCKWQ_ASYNC_WAITDATA 1 #define SOCK_NOSPACE 2 #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 @@ -96,7 +96,7 @@ struct socket_wq { * struct socket - general BSD socket * @state: socket state (%SS_CONNECTED, etc) * @type: socket type (%SOCK_STREAM, etc) - * @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc) + * @flags: socket flags (%SOCK_NOSPACE, etc) * @ops: protocol specific socket operations * @file: File back pointer for gc * @sk: internal networking protocol agnostic socket representation -- cgit v1.2.3 From ceb5d58b217098a657f3850b7a2640f995032e62 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Nov 2015 20:03:11 -0800 Subject: net: fix sock_wake_async() rcu protection Dmitry provided a syzkaller (http://github.com/google/syzkaller) triggering a fault in sock_wake_async() when async IO is requested. Said program stressed af_unix sockets, but the issue is generic and should be addressed in core networking stack. The problem is that by the time sock_wake_async() is called, we should not access the @flags field of 'struct socket', as the inode containing this socket might be freed without further notice, and without RCU grace period. We already maintain an RCU protected structure, "struct socket_wq" so moving SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA into it is the safe route. It also reduces number of cache lines needing dirtying, so might provide a performance improvement anyway. In followup patches, we might move remaining flags (SOCK_NOSPACE, SOCK_PASSCRED, SOCK_PASSSEC) to save 8 bytes and let 'struct socket' being mostly read and let it being shared between cpus. Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/net.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index f514e4dd5521..0b4ac7da583a 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -34,6 +34,10 @@ struct inode; struct file; struct net; +/* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located + * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected. + * Eventually all flags will be in sk->sk_wq_flags. + */ #define SOCKWQ_ASYNC_NOSPACE 0 #define SOCKWQ_ASYNC_WAITDATA 1 #define SOCK_NOSPACE 2 @@ -89,6 +93,7 @@ struct socket_wq { /* Note: wait MUST be first field of socket_wq */ wait_queue_head_t wait; struct fasync_struct *fasync_list; + unsigned long flags; /* %SOCKWQ_ASYNC_NOSPACE, etc */ struct rcu_head rcu; } ____cacheline_aligned_in_smp; @@ -202,7 +207,7 @@ enum { SOCK_WAKE_URG, }; -int sock_wake_async(struct socket *sk, int how, int band); +int sock_wake_async(struct socket_wq *sk_wq, int how, int band); int sock_register(const struct net_proto_family *fam); void sock_unregister(int family); int __sock_create(struct net *net, int family, int type, int proto, -- cgit v1.2.3 From 64031e3e8a5c042840c5123af695eec89f9e6a24 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Wed, 2 Dec 2015 15:44:22 +0800 Subject: ACPI / property: fix compile error for acpi_node_get_property_reference() when CONFIG_ACPI=n In commit 60ba032ed76e ("ACPI / property: Drop size_prop from acpi_dev_get_property_reference()"), the argument "const char *cells_name" was dropped, but forgot to update the stub function in no-ACPI case, it will lead to compile error when CONFIG_ACPI=n, easliy remove "const char *cells_name" to fix it. Fixes: 60ba032ed76e "ACPI / property: Drop size_prop from acpi_dev_get_property_reference()" Reported-by: Kejian Yan Signed-off-by: Hanjun Guo Acked-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 865d948c60e6..9e6f4bb4692f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -782,8 +782,8 @@ static inline int acpi_dev_get_property(struct acpi_device *adev, } static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode, - const char *name, const char *cells_name, - size_t index, struct acpi_reference_args *args) + const char *name, size_t index, + struct acpi_reference_args *args) { return -ENXIO; } -- cgit v1.2.3 From 69030dd1c3671625c6f766af0b64a4bb4409ac3b Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 1 Dec 2015 16:52:14 -0800 Subject: cpufreq: use last policy after online for drivers with ->setpolicy For cpufreq drivers which use setpolicy interface, after offline->online the policy is set to default. This can be reproduced by setting the default policy of intel_pstate or longrun to ondemand and then change to "performance". After offline and online, the setpolicy will be called with the policy=ondemand. For drivers using governors this condition is handled by storing last_governor, during offline and restoring during online. The same should be done for drivers using setpolicy interface. Storing last_policy during offline and restoring during online. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index ef4c5b1a860f..177c7680c1a8 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -77,6 +77,7 @@ struct cpufreq_policy { unsigned int suspend_freq; /* freq to set during suspend */ unsigned int policy; /* see above */ + unsigned int last_policy; /* policy before unplug */ struct cpufreq_governor *governor; /* see below */ void *governor_data; bool governor_enabled; /* governor start/stop flag */ -- cgit v1.2.3 From 45f6fad84cc305103b28d73482b344d7f5b76f39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Nov 2015 19:37:57 -0800 Subject: ipv6: add complete rcu protection around np->opt This patch addresses multiple problems : UDP/RAW sendmsg() need to get a stable struct ipv6_txoptions while socket is not locked : Other threads can change np->opt concurrently. Dmitry posted a syzkaller (http://github.com/google/syzkaller) program desmonstrating use-after-free. Starting with TCP/DCCP lockless listeners, tcp_v6_syn_recv_sock() and dccp_v6_request_recv_sock() also need to use RCU protection to dereference np->opt once (before calling ipv6_dup_options()) This patch adds full RCU protection to np->opt Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 0ef2a97ccdb5..402753bccafa 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -227,7 +227,7 @@ struct ipv6_pinfo { struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist __rcu *ipv6_fl_list; - struct ipv6_txoptions *opt; + struct ipv6_txoptions __rcu *opt; struct sk_buff *pktoptions; struct sk_buff *rxpmtu; struct inet6_cork cork; -- cgit v1.2.3 From 1f7dd3e5a6e4f093017fff12232572ee1aa4639b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Dec 2015 10:18:21 -0500 Subject: cgroup: fix handling of multi-destination migration from subtree_control enabling Consider the following v2 hierarchy. P0 (+memory) --- P1 (-memory) --- A \- B P0 has memory enabled in its subtree_control while P1 doesn't. If both A and B contain processes, they would belong to the memory css of P1. Now if memory is enabled on P1's subtree_control, memory csses should be created on both A and B and A's processes should be moved to the former and B's processes the latter. IOW, enabling controllers can cause atomic migrations into different csses. The core cgroup migration logic has been updated accordingly but the controller migration methods haven't and still assume that all tasks migrate to a single target css; furthermore, the methods were fed the css in which subtree_control was updated which is the parent of the target csses. pids controller depends on the migration methods to move charges and this made the controller attribute charges to the wrong csses often triggering the following warning by driving a counter negative. WARNING: CPU: 1 PID: 1 at kernel/cgroup_pids.c:97 pids_cancel.constprop.6+0x31/0x40() Modules linked in: CPU: 1 PID: 1 Comm: systemd Not tainted 4.4.0-rc1+ #29 ... ffffffff81f65382 ffff88007c043b90 ffffffff81551ffc 0000000000000000 ffff88007c043bc8 ffffffff810de202 ffff88007a752000 ffff88007a29ab00 ffff88007c043c80 ffff88007a1d8400 0000000000000001 ffff88007c043bd8 Call Trace: [] dump_stack+0x4e/0x82 [] warn_slowpath_common+0x82/0xc0 [] warn_slowpath_null+0x1a/0x20 [] pids_cancel.constprop.6+0x31/0x40 [] pids_can_attach+0x6d/0xf0 [] cgroup_taskset_migrate+0x6c/0x330 [] cgroup_migrate+0xf5/0x190 [] cgroup_attach_task+0x176/0x200 [] __cgroup_procs_write+0x2ad/0x460 [] cgroup_procs_write+0x14/0x20 [] cgroup_file_write+0x35/0x1c0 [] kernfs_fop_write+0x141/0x190 [] __vfs_write+0x28/0xe0 [] vfs_write+0xac/0x1a0 [] SyS_write+0x49/0xb0 [] entry_SYSCALL_64_fastpath+0x12/0x76 This patch fixes the bug by removing @css parameter from the three migration methods, ->can_attach, ->cancel_attach() and ->attach() and updating cgroup_taskset iteration helpers also return the destination css in addition to the task being migrated. All controllers are updated accordingly. * Controllers which don't care whether there are one or multiple target csses can be converted trivially. cpu, io, freezer, perf, netclassid and netprio fall in this category. * cpuset's current implementation assumes that there's single source and destination and thus doesn't support v2 hierarchy already. The only change made by this patchset is how that single destination css is obtained. * memory migration path already doesn't do anything on v2. How the single destination css is obtained is updated and the prep stage of mem_cgroup_can_attach() is reordered to accomodate the change. * pids is the only controller which was affected by this bug. It now correctly handles multi-destination migrations and no longer causes counter underflow from incorrect accounting. Signed-off-by: Tejun Heo Reported-and-tested-by: Daniel Wagner Cc: Aleksa Sarai --- include/linux/cgroup-defs.h | 9 +++------ include/linux/cgroup.h | 33 ++++++++++++++++++++++----------- 2 files changed, 25 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 869fd4a3d28e..06b77f9dd3f2 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -422,12 +422,9 @@ struct cgroup_subsys { void (*css_reset)(struct cgroup_subsys_state *css); void (*css_e_css_changed)(struct cgroup_subsys_state *css); - int (*can_attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*cancel_attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); + int (*can_attach)(struct cgroup_taskset *tset); + void (*cancel_attach)(struct cgroup_taskset *tset); + void (*attach)(struct cgroup_taskset *tset); int (*can_fork)(struct task_struct *task, void **priv_p); void (*cancel_fork)(struct task_struct *task, void *priv); void (*fork)(struct task_struct *task, void *priv); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f64083030ad5..cb91b44f5f78 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -120,8 +120,10 @@ struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *css); -struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); -struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); +struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset, + struct cgroup_subsys_state **dst_cssp); +struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, + struct cgroup_subsys_state **dst_cssp); void css_task_iter_start(struct cgroup_subsys_state *css, struct css_task_iter *it); @@ -236,30 +238,39 @@ void css_task_iter_end(struct css_task_iter *it); /** * cgroup_taskset_for_each - iterate cgroup_taskset * @task: the loop cursor + * @dst_css: the destination css * @tset: taskset to iterate * * @tset may contain multiple tasks and they may belong to multiple - * processes. When there are multiple tasks in @tset, if a task of a - * process is in @tset, all tasks of the process are in @tset. Also, all - * are guaranteed to share the same source and destination csses. + * processes. + * + * On the v2 hierarchy, there may be tasks from multiple processes and they + * may not share the source or destination csses. + * + * On traditional hierarchies, when there are multiple tasks in @tset, if a + * task of a process is in @tset, all tasks of the process are in @tset. + * Also, all are guaranteed to share the same source and destination csses. * * Iteration is not in any specific order. */ -#define cgroup_taskset_for_each(task, tset) \ - for ((task) = cgroup_taskset_first((tset)); (task); \ - (task) = cgroup_taskset_next((tset))) +#define cgroup_taskset_for_each(task, dst_css, tset) \ + for ((task) = cgroup_taskset_first((tset), &(dst_css)); \ + (task); \ + (task) = cgroup_taskset_next((tset), &(dst_css))) /** * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset * @leader: the loop cursor + * @dst_css: the destination css * @tset: takset to iterate * * Iterate threadgroup leaders of @tset. For single-task migrations, @tset * may not contain any. */ -#define cgroup_taskset_for_each_leader(leader, tset) \ - for ((leader) = cgroup_taskset_first((tset)); (leader); \ - (leader) = cgroup_taskset_next((tset))) \ +#define cgroup_taskset_for_each_leader(leader, dst_css, tset) \ + for ((leader) = cgroup_taskset_first((tset), &(dst_css)); \ + (leader); \ + (leader) = cgroup_taskset_next((tset), &(dst_css))) \ if ((leader) != (leader)->group_leader) \ ; \ else -- cgit v1.2.3 From ae5515d66362b9d96cdcfce504567f0b8b7bd83e Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 4 Dec 2015 08:38:42 -0700 Subject: Revert: "vfio: Include No-IOMMU mode" Revert commit 033291eccbdb ("vfio: Include No-IOMMU mode") due to lack of a user. This was originally intended to fill a need for the DPDK driver, but uptake has been slow so rather than support an unproven kernel interface revert it and revisit when userspace catches up. Signed-off-by: Alex Williamson --- include/linux/vfio.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 610a86a892b8..ddb440975382 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -44,9 +44,6 @@ struct vfio_device_ops { void (*request)(void *device_data, unsigned int count); }; -extern struct iommu_group *vfio_iommu_group_get(struct device *dev); -extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev); - extern int vfio_add_group_dev(struct device *dev, const struct vfio_device_ops *ops, void *device_data); -- cgit v1.2.3 From ea013a9b205b47b1fcbc72522146fad560af0712 Mon Sep 17 00:00:00 2001 From: Andreas Werner Date: Fri, 4 Dec 2015 18:12:49 +0100 Subject: libata-eh.c: Introduce new ata port flag for controller which lockup on read log page Some controller lockup on a ata_read_log_page. Add new ata port flag ATA_FLAG_NO_LOG_PAGE which can used to blacklist a controller. If this flag is set, any attempt to read a log page returns an error without actually issuing the command. Signed-off-by: Andreas Werner Signed-off-by: Tejun Heo --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 83577f8fd15b..600c1e0626a5 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -210,6 +210,7 @@ enum { ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ /* (doesn't imply presence) */ ATA_FLAG_SATA = (1 << 1), + ATA_FLAG_NO_LOG_PAGE = (1 << 5), /* do not issue log page read */ ATA_FLAG_NO_ATAPI = (1 << 6), /* No ATAPI support */ ATA_FLAG_PIO_DMA = (1 << 7), /* PIO cmds via DMA */ ATA_FLAG_PIO_LBA48 = (1 << 8), /* Host DMA engine is LBA28 only */ -- cgit v1.2.3 From 57b4bd06ff0372fe1e3617889c4b37fbd500364a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Sun, 6 Dec 2015 11:25:47 +0100 Subject: lightnvm: comments on constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is not obvious what NVM_IO_* and NVM_BLK_T_* are used for. Make sure to comment them appropriately as the other constants. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index c6916aec43b6..935ef3844c05 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -50,9 +50,16 @@ enum { NVM_IO_DUAL_ACCESS = 0x1, NVM_IO_QUAD_ACCESS = 0x2, + /* NAND Access Modes */ NVM_IO_SUSPEND = 0x80, NVM_IO_SLC_MODE = 0x100, NVM_IO_SCRAMBLE_DISABLE = 0x200, + + /* Block Types */ + NVM_BLK_T_FREE = 0x0, + NVM_BLK_T_BAD = 0x1, + NVM_BLK_T_DEV = 0x2, + NVM_BLK_T_HOST = 0x4, }; struct nvm_id_group { -- cgit v1.2.3 From 16f26c3aa9b9c36a9d1092ae3258461d1008481e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Sun, 6 Dec 2015 11:25:48 +0100 Subject: lightnvm: replace req queue with nvmdev for lld MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the case where a request queue is passed to the low lever lightnvm device drive integration, the device driver might pass its admin commands through another queue. Instead pass nvm_dev, and let the low level drive the appropriate queue. Reported-by: Christoph Hellwig Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 935ef3844c05..034117b3be5f 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -183,17 +183,17 @@ struct nvm_block; typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *); typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *); -typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *); -typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32, +typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *); +typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32, nvm_l2p_update_fn *, void *); typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int, nvm_bb_update_fn *, void *); -typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int); -typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *); -typedef int (nvm_erase_blk_fn)(struct request_queue *, struct nvm_rq *); -typedef void *(nvm_create_dma_pool_fn)(struct request_queue *, char *); +typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int); +typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); +typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *); +typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); typedef void (nvm_destroy_dma_pool_fn)(void *); -typedef void *(nvm_dev_dma_alloc_fn)(struct request_queue *, void *, gfp_t, +typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t, dma_addr_t *); typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t); -- cgit v1.2.3 From a5e14ba334e202c58e45ef47414ec94c585c1a8c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 28 Oct 2015 13:28:15 +0200 Subject: mlx4: Expose correct max_sge_rd limit mlx4 devices (ConnectX-2, ConnectX-3) has a limitation where rdma read work queue entries cannot exceed 512 bytes. A rdma_read wqe needs to fit in 512 bytes: - wqe control segment (16 bytes) - rdma segment (16 bytes) - scatter elements (16 bytes each) So max_sge_rd should be: (512 - 16 - 16) / 16 = 30. Signed-off-by: Sagi Grimberg Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7501626ab529..d3133be12d92 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -426,6 +426,17 @@ enum { MLX4_MAX_FAST_REG_PAGES = 511, }; +enum { + /* + * Max wqe size for rdma read is 512 bytes, so this + * limits our max_sge_rd as the wqe needs to fit: + * - ctrl segment (16 bytes) + * - rdma segment (16 bytes) + * - scatter elements (16 bytes each) + */ + MLX4_MAX_SGE_RD = (512 - 16 - 16) / 16 +}; + enum { MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14, MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15, -- cgit v1.2.3 From 4c3141e09cfa6460bfcd5e90f73e498db654c917 Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Tue, 1 Dec 2015 17:24:17 +0100 Subject: of/irq: Export of_irq_find_parent again of_irq_find_parent was made static since it had no users outside of of_irq.c. Export it again since we are going to use it again. Signed-off-by: Carlo Caione [robh: move of_irq_find_parent to correct ifdef section] Signed-off-by: Rob Herring --- include/linux/of_irq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 039f2eec49ce..f648acf27ed7 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -46,6 +46,7 @@ extern int of_irq_get(struct device_node *dev, int index); extern int of_irq_get_byname(struct device_node *dev, const char *name); extern int of_irq_to_resource_table(struct device_node *dev, struct resource *res, int nr_irqs); +extern struct device_node *of_irq_find_parent(struct device_node *child); extern struct irq_domain *of_msi_get_domain(struct device *dev, struct device_node *np, enum irq_domain_bus_token token); @@ -70,6 +71,11 @@ static inline int of_irq_to_resource_table(struct device_node *dev, { return 0; } +static inline void *of_irq_find_parent(struct device_node *child) +{ + return NULL; +} + static inline struct irq_domain *of_msi_get_domain(struct device *dev, struct device_node *np, enum irq_domain_bus_token token) -- cgit v1.2.3 From eaddb5725357e9f05ffe5d271630f8197d089da4 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 9 Dec 2015 09:11:10 -0600 Subject: of/irq: move of_msi_map_rid declaration to the correct ifdef section In checking fixes for of_irq_find_parent declaration location, I found that of_msi_map_rid is also wrong. of_msi_map_rid is not implemented for Sparc, so it should not be in the Sparc specific section of the header. Move it to just depend on OF_IRQ. Cc: Frank Rowand Signed-off-by: Rob Herring --- include/linux/of_irq.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index f648acf27ed7..1e0deb8e8494 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -53,6 +53,7 @@ extern struct irq_domain *of_msi_get_domain(struct device *dev, extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 rid); extern void of_msi_configure(struct device *dev, struct device_node *np); +u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in); #else static inline int of_irq_count(struct device_node *dev) { @@ -90,6 +91,11 @@ static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev static inline void of_msi_configure(struct device *dev, struct device_node *np) { } +static inline u32 of_msi_map_rid(struct device *dev, + struct device_node *msi_np, u32 rid_in) +{ + return rid_in; +} #endif #if defined(CONFIG_OF_IRQ) || defined(CONFIG_SPARC) @@ -99,7 +105,6 @@ static inline void of_msi_configure(struct device *dev, struct device_node *np) * so declare it here regardless of the CONFIG_OF_IRQ setting. */ extern unsigned int irq_of_parse_and_map(struct device_node *node, int index); -u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in); #else /* !CONFIG_OF && !CONFIG_SPARC */ static inline unsigned int irq_of_parse_and_map(struct device_node *dev, @@ -107,12 +112,6 @@ static inline unsigned int irq_of_parse_and_map(struct device_node *dev, { return 0; } - -static inline u32 of_msi_map_rid(struct device *dev, - struct device_node *msi_np, u32 rid_in) -{ - return rid_in; -} #endif /* !CONFIG_OF */ #endif /* __OF_IRQ_H */ -- cgit v1.2.3 From d7e35dfa2531b53618b9e6edcd8752ce988ac555 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 3 Dec 2015 22:04:01 -0500 Subject: bitops.h: correctly handle rol32 with 0 byte shift ROL on a 32 bit integer with a shift of 32 or more is undefined and the result is arch-dependent. Avoid this by handling the trivial case of roling by 0 correctly. The trivial solution of checking if shift is 0 breaks gcc's detection of this code as a ROL instruction, which is unacceptable. This bug was reported and fixed in GCC (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57157): The standard rotate idiom, (x << n) | (x >> (32 - n)) is recognized by gcc (for concreteness, I discuss only the case that x is an uint32_t here). However, this is portable C only for n in the range 0 < n < 32. For n == 0, we get x >> 32 which gives undefined behaviour according to the C standard (6.5.7, Bitwise shift operators). To portably support n == 0, one has to write the rotate as something like (x << n) | (x >> ((-n) & 31)) And this is apparently not recognized by gcc. Note that this is broken on older GCCs and will result in slower ROL. Acked-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 2b8ed123ad36..defeaac0745f 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -107,7 +107,7 @@ static inline __u64 ror64(__u64 word, unsigned int shift) */ static inline __u32 rol32(__u32 word, unsigned int shift) { - return (word << shift) | (word >> (32 - shift)); + return (word << shift) | (word >> ((-shift) & 31)); } /** -- cgit v1.2.3 From 059393c5bdd1420bdf1bed2972f33196dff263ae Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 7 Dec 2015 10:11:11 +0000 Subject: irqchip/gic-v3: Add missing struct device_node declaration When the GICv3 header file is used in a C file that doesn't include any of the OF stuff, we end up with a bunch of ugly warnings. Let's keep GCC quiet by adding a forward declaration. Signed-off-by: Marc Zyngier Cc: Cc: Jason Cooper Link: http://lkml.kernel.org/r/1449483072-17694-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irqchip/arm-gic-v3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index c9ae0c6ec050..d5d798b35c1f 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -330,6 +330,7 @@ struct rdists { }; struct irq_domain; +struct device_node; int its_cpu_init(void); int its_init(struct device_node *node, struct rdists *rdists, struct irq_domain *domain); -- cgit v1.2.3 From ad87e03213b552a5c33d5e1e7a19a73768397010 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 10 Dec 2015 15:27:21 -0500 Subject: USB: add quirk for devices with broken LPM Some USB device / host controller combinations seem to have problems with Link Power Management. For example, Steinar found that his xHCI controller wouldn't handle bandwidth calculations correctly for two video cards simultaneously when LPM was enabled, even though the bus had plenty of bandwidth available. This patch introduces a new quirk flag for devices that should remain disabled for LPM, and creates quirk entries for Steinar's devices. Signed-off-by: Alan Stern Reported-by: Steinar H. Gunderson Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/quirks.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index 9948c874e3f1..1d0043dc34e4 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -47,4 +47,7 @@ /* device generates spurious wakeup, ignore remote wakeup capability */ #define USB_QUIRK_IGNORE_REMOTE_WAKEUP BIT(9) +/* device can't handle Link Power Management */ +#define USB_QUIRK_NO_LPM BIT(10) + #endif /* __LINUX_USB_QUIRKS_H */ -- cgit v1.2.3 From 98e89cf02aed11166698dd53c6f14865613babb3 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Fri, 11 Dec 2015 13:40:43 -0800 Subject: mm: kmemleak: mark kmemleak_init prototype as __init The kmemleak_init() definition in mm/kmemleak.c is marked __init but its prototype in include/linux/kmemleak.h is marked __ref since commit a6186d89c913 ("kmemleak: Mark the early log buffer as __initdata"). This causes a section mismatch which is reported as a warning when building with clang -Wsection, because kmemleak_init() is declared in section .ref.text but defined in .init.text. Fix this by marking kmemleak_init() prototype __init. Signed-off-by: Nicolas Iooss Signed-off-by: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmemleak.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index d0a1f99e24e3..4894c6888bc6 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -25,7 +25,7 @@ #ifdef CONFIG_DEBUG_KMEMLEAK -extern void kmemleak_init(void) __ref; +extern void kmemleak_init(void) __init; extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp) __ref; extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, -- cgit v1.2.3 From 86fffe4a61dd972d5a4e23260d530be6da02f614 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 11 Dec 2015 13:40:46 -0800 Subject: kernel: remove stop_machine() Kconfig dependency Currently the full stop_machine() routine is only enabled on SMP if module unloading is enabled, or if the CPUs are hotpluggable. This leads to configurations where stop_machine() is broken as it will then only run the callback on the local CPU with irqs disabled, and not stop the other CPUs or run the callback on them. For example, this breaks MTRR setup on x86 in certain configs since ea8596bb2d8d379 ("kprobes/x86: Remove unused text_poke_smp() and text_poke_smp_batch() functions") as the MTRR is only established on the boot CPU. This patch removes the Kconfig option for STOP_MACHINE and uses the SMP and HOTPLUG_CPU config options to compile the correct stop_machine() for the architecture, removing the false dependency on MODULE_UNLOAD in the process. Link: https://lkml.org/lkml/2014/10/8/124 References: https://bugs.freedesktop.org/show_bug.cgi?id=84794 Signed-off-by: Chris Wilson Acked-by: Ingo Molnar Cc: "Paul E. McKenney" Cc: Pranith Kumar Cc: Michal Hocko Cc: Vladimir Davydov Cc: Johannes Weiner Cc: H. Peter Anvin Cc: Tejun Heo Cc: Iulia Manda Cc: Andy Lutomirski Cc: Rusty Russell Cc: Peter Zijlstra Cc: Chuck Ebbert Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/stop_machine.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 0adedca24c5b..0e1b1540597a 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -99,7 +99,7 @@ static inline int try_stop_cpus(const struct cpumask *cpumask, * grabbing every spinlock (and more). So the "read" side to such a * lock is anything which disables preemption. */ -#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP) +#if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU) /** * stop_machine: freeze the machine on all CPUs and run this function @@ -118,7 +118,7 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); -#else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ +#else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */ static inline int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) @@ -137,5 +137,5 @@ static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, return stop_machine(fn, data, cpus); } -#endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ +#endif /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */ #endif /* _LINUX_STOP_MACHINE */ -- cgit v1.2.3 From dfd01f026058a59a513f8a365b439a0681b803af Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 13 Dec 2015 22:11:16 +0100 Subject: sched/wait: Fix the signal handling fix Jan Stancek reported that I wrecked things for him by fixing things for Vladimir :/ His report was due to an UNINTERRUPTIBLE wait getting -EINTR, which should not be possible, however my previous patch made this possible by unconditionally checking signal_pending(). We cannot use current->state as was done previously, because the instruction after the store to that variable it can be changed. We must instead pass the initial state along and use that. Fixes: 68985633bccb ("sched/wait: Fix signal handling in bit wait helpers") Reported-by: Jan Stancek Reported-by: Chris Mason Tested-by: Jan Stancek Tested-by: Vladimir Murzin Tested-by: Chris Mason Reviewed-by: Paul Turner Cc: Ingo Molnar Cc: tglx@linutronix.de Cc: Oleg Nesterov Cc: hpa@zytor.com Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds --- include/linux/wait.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 1e1bf9f963a9..513b36f04dfd 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -145,7 +145,7 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old) list_del(&old->task_list); } -typedef int wait_bit_action_f(struct wait_bit_key *); +typedef int wait_bit_action_f(struct wait_bit_key *, int mode); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key); @@ -960,10 +960,10 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); } while (0) -extern int bit_wait(struct wait_bit_key *); -extern int bit_wait_io(struct wait_bit_key *); -extern int bit_wait_timeout(struct wait_bit_key *); -extern int bit_wait_io_timeout(struct wait_bit_key *); +extern int bit_wait(struct wait_bit_key *, int); +extern int bit_wait_io(struct wait_bit_key *, int); +extern int bit_wait_timeout(struct wait_bit_key *, int); +extern int bit_wait_io_timeout(struct wait_bit_key *, int); /** * wait_on_bit - wait for a bit to be cleared -- cgit v1.2.3