From 34c06254ff82a815fdccdfae7517a06c9b768cee Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 5 Nov 2015 00:12:24 -0500
Subject: cgroup: fix cftype->file_offset handling

6f60eade2433 ("cgroup: generalize obtaining the handles of and
notifying cgroup files") introduced cftype->file_offset so that the
handles for per-css file instances can be recorded.  These handles
then can be used, for example, to generate file modified
notifications.

Unfortunately, it made the wrong assumption that files are created
once for a given css and removed on its destruction.  Due to the
dependencies among subsystems, a css may be hidden from userland and
then later shown again.  This is implemented by removing and
re-creating the affected files, so the associated kernfs_node for a
given cgroup file may change over time.  This incorrect assumption led
to the corruption of css->files lists.

Reimplement cftype->file_offset handling so that cgroup_file->kn is
protected by a lock and updated as files are created and destroyed.
This also makes keeping them on per-cgroup list unnecessary.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: James Sedgwick <jsedgwick@fb.com>
Fixes: 6f60eade2433 ("cgroup: generalize obtaining the handles of and notifying cgroup files")
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Zefan Li <lizefan@huawei.com>
---
 include/linux/cgroup-defs.h |  4 ----
 include/linux/cgroup.h      | 14 +-------------
 2 files changed, 1 insertion(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 60d44b26276d..869fd4a3d28e 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -90,7 +90,6 @@ enum {
  */
 struct cgroup_file {
 	/* do not access any fields from outside cgroup core */
-	struct list_head node;			/* anchored at css->files */
 	struct kernfs_node *kn;
 };
 
@@ -134,9 +133,6 @@ struct cgroup_subsys_state {
 	 */
 	u64 serial_nr;
 
-	/* all cgroup_files associated with this css */
-	struct list_head files;
-
 	/* percpu_ref killing and RCU release */
 	struct rcu_head rcu_head;
 	struct work_struct destroy_work;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 22e3754f89c5..f64083030ad5 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -88,6 +88,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
 int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 int cgroup_rm_cftypes(struct cftype *cfts);
+void cgroup_file_notify(struct cgroup_file *cfile);
 
 char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
 int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
@@ -516,19 +517,6 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
 	pr_cont_kernfs_path(cgrp->kn);
 }
 
-/**
- * cgroup_file_notify - generate a file modified event for a cgroup_file
- * @cfile: target cgroup_file
- *
- * @cfile must have been obtained by setting cftype->file_offset.
- */
-static inline void cgroup_file_notify(struct cgroup_file *cfile)
-{
-	/* might not have been created due to one of the CFTYPE selector flags */
-	if (cfile->kn)
-		kernfs_notify(cfile->kn);
-}
-
 #else /* !CONFIG_CGROUPS */
 
 struct cgroup_subsys_state;
-- 
cgit v1.2.3


From 451c2b5caf37b526ae34a1081b71115e1de2d063 Mon Sep 17 00:00:00 2001
From: Aya Mahfouz <mahfouz.saif.elyazal@gmail.com>
Date: Wed, 18 Nov 2015 08:36:44 +0200
Subject: net: dns_resolver: convert time_t to time64_t

Changes the definition of the pointer _expiry from time_t to
time64_t. This is to handle the Y2038 problem where time_t
will overflow in the year 2038. The change is safe because
the kernel subsystems that call dns_query pass NULL.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Aya Mahfouz <mahfouz.saif.elyazal@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dns_resolver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dns_resolver.h b/include/linux/dns_resolver.h
index cc92268af89a..6ac3cad9aef1 100644
--- a/include/linux/dns_resolver.h
+++ b/include/linux/dns_resolver.h
@@ -27,7 +27,7 @@
 #ifdef __KERNEL__
 
 extern int dns_query(const char *type, const char *name, size_t namelen,
-		     const char *options, char **_result, time_t *_expiry);
+		     const char *options, char **_result, time64_t *_expiry);
 
 #endif /* KERNEL */
 
-- 
cgit v1.2.3


From 614e4c4ebc75517295bccd29b20ddbc5b52af6fc Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 12 Nov 2015 11:00:04 +0100
Subject: perf/core: Robustify the perf_cgroup_from_task() RCU checks

This patch reinforces the lockdep checks performed by
perf_cgroup_from_tsk() by passing the perf_event_context
whenever possible. It is okay to not hold the RCU read lock
when we know we hold the ctx->lock. This patch makes sure this
property holds.

In some functions, such as perf_cgroup_sched_in(), we do not
pass the context because we are sure we are holding the RCU
read lock.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: edumazet@google.com
Link: http://lkml.kernel.org/r/1447322404-10920-3-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d841d33bcdc9..f9828a48f16a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -697,9 +697,11 @@ struct perf_cgroup {
  * if there is no cgroup event for the current CPU context.
  */
 static inline struct perf_cgroup *
-perf_cgroup_from_task(struct task_struct *task)
+perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
 {
-	return container_of(task_css(task, perf_event_cgrp_id),
+	return container_of(task_css_check(task, perf_event_cgrp_id,
+					   ctx ? lockdep_is_held(&ctx->lock)
+					       : true),
 			    struct perf_cgroup, css);
 }
 #endif /* CONFIG_CGROUP_PERF */
-- 
cgit v1.2.3


From 90eec103b96e30401c0b846045bf8a1c7159b6da Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 16 Nov 2015 11:08:45 +0100
Subject: treewide: Remove old email address

There were still a number of references to my old Red Hat email
address in the kernel source. Remove these while keeping the
Red Hat copyright notices intact.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/jump_label.h  | 2 +-
 include/linux/lockdep.h     | 2 +-
 include/linux/proportions.h | 2 +-
 include/linux/uprobes.h     | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 8dde55974f18..0536524bb9eb 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -5,7 +5,7 @@
  * Jump label support
  *
  * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com>
- * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
  *
  * DEPRECATED API:
  *
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 70400dc7660f..c57e424d914b 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -2,7 +2,7 @@
  * Runtime locking correctness validator
  *
  *  Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  *
  * see Documentation/locking/lockdep-design.txt for more details.
  */
diff --git a/include/linux/proportions.h b/include/linux/proportions.h
index 5440f64d2942..21221338ad18 100644
--- a/include/linux/proportions.h
+++ b/include/linux/proportions.h
@@ -1,7 +1,7 @@
 /*
  * FLoating proportions
  *
- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  *
  * This file contains the public data structure and API definitions.
  */
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 0bdc72f36905..4a29c75b146e 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -21,7 +21,7 @@
  * Authors:
  *	Srikar Dronamraju
  *	Jim Keniston
- * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
  */
 
 #include <linux/errno.h>
-- 
cgit v1.2.3


From c9da161c6517ba12154059d3b965c2cbaf16f90f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 24 Nov 2015 21:28:15 +0100
Subject: bpf: fix clearing on persistent program array maps

Currently, when having map file descriptors pointing to program arrays,
there's still the issue that we unconditionally flush program array
contents via bpf_fd_array_map_clear() in bpf_map_release(). This happens
when such a file descriptor is released and is independent of the map's
refcount.

Having this flush independent of the refcount is for a reason: there
can be arbitrary complex dependency chains among tail calls, also circular
ones (direct or indirect, nesting limit determined during runtime), and
we need to make sure that the map drops all references to eBPF programs
it holds, so that the map's refcount can eventually drop to zero and
initiate its freeing. Btw, a walk of the whole dependency graph would
not be possible for various reasons, one being complexity and another
one inconsistency, i.e. new programs can be added to parts of the graph
at any time, so there's no guaranteed consistent state for the time of
such a walk.

Now, the program array pinning itself works, but the issue is that each
derived file descriptor on close would nevertheless call unconditionally
into bpf_fd_array_map_clear(). Instead, keep track of users and postpone
this flush until the last reference to a user is dropped. As this only
concerns a subset of references (f.e. a prog array could hold a program
that itself has reference on the prog array holding it, etc), we need to
track them separately.

Short analysis on the refcounting: on map creation time usercnt will be
one, so there's no change in behaviour for bpf_map_release(), if unpinned.
If we already fail in map_create(), we are immediately freed, and no
file descriptor has been made public yet. In bpf_obj_pin_user(), we need
to probe for a possible map in bpf_fd_probe_obj() already with a usercnt
reference, so before we drop the reference on the fd with fdput().
Therefore, if actual pinning fails, we need to drop that reference again
in bpf_any_put(), otherwise we keep holding it. When last reference
drops on the inode, the bpf_any_put() in bpf_evict_inode() will take
care of dropping the usercnt again. In the bpf_obj_get_user() case, the
bpf_any_get() will grab a reference on the usercnt, still at a time when
we have the reference on the path. Should we later on fail to grab a new
file descriptor, bpf_any_put() will drop it, otherwise we hold it until
bpf_map_release() time.

Joint work with Alexei.

Fixes: b2197755b263 ("bpf: add support for persistent maps/progs")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index de464e6683b6..83d1926c61e4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -40,6 +40,7 @@ struct bpf_map {
 	struct user_struct *user;
 	const struct bpf_map_ops *ops;
 	struct work_struct work;
+	atomic_t usercnt;
 };
 
 struct bpf_map_type_list {
@@ -167,8 +168,10 @@ struct bpf_prog *bpf_prog_get(u32 ufd);
 void bpf_prog_put(struct bpf_prog *prog);
 void bpf_prog_put_rcu(struct bpf_prog *prog);
 
-struct bpf_map *bpf_map_get(u32 ufd);
+struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
+void bpf_map_inc(struct bpf_map *map, bool uref);
+void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 
 extern int sysctl_unprivileged_bpf_disabled;
-- 
cgit v1.2.3


From ca369d51b3e1649be4a72addd6d6a168cfb3f537 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 13 Nov 2015 16:46:48 -0500
Subject: block/sd: Fix device-imposed transfer length limits

Commit 4f258a46346c ("sd: Fix maximum I/O size for BLOCK_PC requests")
had the unfortunate side-effect of removing an implicit clamp to
BLK_DEF_MAX_SECTORS for REQ_TYPE_FS requests in the block layer
code. This caused problems for some SMR drives.

Debugging this issue revealed a few problems with the existing
infrastructure since the block layer didn't know how to deal with
device-imposed limits, only limits set by the I/O controller.

 - Introduce a new queue limit, max_dev_sectors, which is used by the
   ULD to signal the maximum sectors for a REQ_TYPE_FS request.

 - Ensure that max_dev_sectors is correctly stacked and taken into
   account when overriding max_sectors through sysfs.

 - Rework sd_read_block_limits() so it saves the max_xfer and opt_xfer
   values for later processing.

 - In sd_revalidate() set the queue's max_dev_sectors based on the
   MAXIMUM TRANSFER LENGTH value in the Block Limits VPD. If this value
   is not reported, fall back to a cap based on the CDB TRANSFER LENGTH
   field size.

 - In sd_revalidate(), use OPTIMAL TRANSFER LENGTH from the Block Limits
   VPD--if reported and sane--to signal the preferred device transfer
   size for FS requests. Otherwise use BLK_DEF_MAX_SECTORS.

 - blk_limits_max_hw_sectors() is no longer used and can be removed.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=93581
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: sweeneygj@gmx.com
Tested-by: Arzeets <anatol.pomozov@gmail.com>
Tested-by: David Eisner <david.eisner@oriel.oxon.org>
Tested-by: Mario Kicherer <dev@kicherer.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 38a5ff772a37..9dacb745fa96 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -253,6 +253,7 @@ struct queue_limits {
 	unsigned long		virt_boundary_mask;
 
 	unsigned int		max_hw_sectors;
+	unsigned int		max_dev_sectors;
 	unsigned int		chunk_sectors;
 	unsigned int		max_sectors;
 	unsigned int		max_segment_size;
@@ -948,7 +949,6 @@ extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
-extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
-- 
cgit v1.2.3


From 08236c6bb2980561fba657c58fdc76f2865f236c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Sat, 28 Nov 2015 16:49:27 +0100
Subject: lightnvm: unconverted ppa returned in get_bb_tbl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The get_bb_tbl function takes ppa as a generic address, which is
converted to the ppa device address within the device driver. When
the update_bbtbl callback is called from get_bb_tbl, the device
specific ppa is used, instead of the generic ppa.

Make sure to pass the generic ppa.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 3db5552b17d5..c6916aec43b6 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -179,7 +179,7 @@ typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *);
 typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *);
 typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32,
 				nvm_l2p_update_fn *, void *);
-typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, struct ppa_addr, int,
+typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int,
 				nvm_bb_update_fn *, void *);
 typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int);
 typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *);
-- 
cgit v1.2.3


From bf4e6b4e757488dee1b6a581f49c7ac34cd217f8 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Thu, 26 Nov 2015 08:46:57 +0100
Subject: block: Always check queue limits for cloned requests

When a cloned request is retried on other queues it always needs
to be checked against the queue limits of that queue.
Otherwise the calculations for nr_phys_segments might be wrong,
leading to a crash in scsi_init_sgtable().

To clarify this the patch renames blk_rq_check_limits()
to blk_cloned_rq_check_limits() and removes the symbol
export, as the new function should only be used for
cloned requests and never exported.

Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Ewan Milne <emilne@redhat.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Hannes Reinecke <hare@suse.de>
Fixes: e2a60da74 ("block: Clean up special command handling logic")
Cc: stable@vger.kernel.org # 3.7+
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c0d2b7927c1f..c06f8eaa42ff 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -773,7 +773,6 @@ extern void blk_rq_set_block_pc(struct request *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern void blk_add_request_payload(struct request *rq, struct page *page,
 		unsigned int len);
-extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 			     struct bio_set *bs, gfp_t gfp_mask,
-- 
cgit v1.2.3


From 880621c2605b82eb5af91a2c94223df6f5a3fb64 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 22 Nov 2015 17:46:09 +0100
Subject: packet: Allow packets with only a header (but no payload)

Commit 9c7077622dd91 ("packet: make packet_snd fail on len smaller
than l2 header") added validation for the packet size in packet_snd.
This change enforces that every packet needs a header (with at least
hard_header_len bytes) plus a payload with at least one byte. Before
this change the payload was optional.

This fixes PPPoE connections which do not have a "Service" or
"Host-Uniq" configured (which is violating the spec, but is still
widely used in real-world setups). Those are currently failing with the
following message: "pppd: packet size is too short (24 <= 24)"

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 67bfac1abfc1..3b5d134e945a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1398,7 +1398,8 @@ enum netdev_priv_flags {
  *	@dma:		DMA channel
  *	@mtu:		Interface MTU value
  *	@type:		Interface hardware type
- *	@hard_header_len: Hardware header length
+ *	@hard_header_len: Hardware header length, which means that this is the
+ *			  minimum size of a packet.
  *
  *	@needed_headroom: Extra headroom the hardware may need, but not in all
  *			  cases can this be guaranteed
-- 
cgit v1.2.3


From 9cd3e072b0be17446e37d7414eac8a3499e0601e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 29 Nov 2015 20:03:10 -0800
Subject: net: rename SOCK_ASYNC_NOSPACE and SOCK_ASYNC_WAITDATA

This patch is a cleanup to make following patch easier to
review.

Goal is to move SOCK_ASYNC_NOSPACE and SOCK_ASYNC_WAITDATA
from (struct socket)->flags to a (struct socket_wq)->flags
to benefit from RCU protection in sock_wake_async()

To ease backports, we rename both constants.

Two new helpers, sk_set_bit(int nr, struct sock *sk)
and sk_clear_bit(int net, struct sock *sk) are added so that
following patch can change their implementation.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 70ac5e28e6b7..f514e4dd5521 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -34,8 +34,8 @@ struct inode;
 struct file;
 struct net;
 
-#define SOCK_ASYNC_NOSPACE	0
-#define SOCK_ASYNC_WAITDATA	1
+#define SOCKWQ_ASYNC_NOSPACE	0
+#define SOCKWQ_ASYNC_WAITDATA	1
 #define SOCK_NOSPACE		2
 #define SOCK_PASSCRED		3
 #define SOCK_PASSSEC		4
@@ -96,7 +96,7 @@ struct socket_wq {
  *  struct socket - general BSD socket
  *  @state: socket state (%SS_CONNECTED, etc)
  *  @type: socket type (%SOCK_STREAM, etc)
- *  @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc)
+ *  @flags: socket flags (%SOCK_NOSPACE, etc)
  *  @ops: protocol specific socket operations
  *  @file: File back pointer for gc
  *  @sk: internal networking protocol agnostic socket representation
-- 
cgit v1.2.3


From ceb5d58b217098a657f3850b7a2640f995032e62 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 29 Nov 2015 20:03:11 -0800
Subject: net: fix sock_wake_async() rcu protection

Dmitry provided a syzkaller (http://github.com/google/syzkaller)
triggering a fault in sock_wake_async() when async IO is requested.

Said program stressed af_unix sockets, but the issue is generic
and should be addressed in core networking stack.

The problem is that by the time sock_wake_async() is called,
we should not access the @flags field of 'struct socket',
as the inode containing this socket might be freed without
further notice, and without RCU grace period.

We already maintain an RCU protected structure, "struct socket_wq"
so moving SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA into it
is the safe route.

It also reduces number of cache lines needing dirtying, so might
provide a performance improvement anyway.

In followup patches, we might move remaining flags (SOCK_NOSPACE,
SOCK_PASSCRED, SOCK_PASSSEC) to save 8 bytes and let 'struct socket'
being mostly read and let it being shared between cpus.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index f514e4dd5521..0b4ac7da583a 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -34,6 +34,10 @@ struct inode;
 struct file;
 struct net;
 
+/* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located
+ * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected.
+ * Eventually all flags will be in sk->sk_wq_flags.
+ */
 #define SOCKWQ_ASYNC_NOSPACE	0
 #define SOCKWQ_ASYNC_WAITDATA	1
 #define SOCK_NOSPACE		2
@@ -89,6 +93,7 @@ struct socket_wq {
 	/* Note: wait MUST be first field of socket_wq */
 	wait_queue_head_t	wait;
 	struct fasync_struct	*fasync_list;
+	unsigned long		flags; /* %SOCKWQ_ASYNC_NOSPACE, etc */
 	struct rcu_head		rcu;
 } ____cacheline_aligned_in_smp;
 
@@ -202,7 +207,7 @@ enum {
 	SOCK_WAKE_URG,
 };
 
-int sock_wake_async(struct socket *sk, int how, int band);
+int sock_wake_async(struct socket_wq *sk_wq, int how, int band);
 int sock_register(const struct net_proto_family *fam);
 void sock_unregister(int family);
 int __sock_create(struct net *net, int family, int type, int proto,
-- 
cgit v1.2.3


From 64031e3e8a5c042840c5123af695eec89f9e6a24 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <hanjun.guo@linaro.org>
Date: Wed, 2 Dec 2015 15:44:22 +0800
Subject: ACPI / property: fix compile error for
 acpi_node_get_property_reference() when CONFIG_ACPI=n

In commit 60ba032ed76e ("ACPI / property: Drop size_prop from
acpi_dev_get_property_reference()"), the argument "const char *cells_name"
was dropped, but forgot to update the stub function in no-ACPI case,
it will lead to compile error when CONFIG_ACPI=n, easliy remove
"const char *cells_name" to fix it.

Fixes: 60ba032ed76e "ACPI / property: Drop size_prop from acpi_dev_get_property_reference()"
Reported-by: Kejian Yan <yankejian@huawei.com>
Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 865d948c60e6..9e6f4bb4692f 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -782,8 +782,8 @@ static inline int acpi_dev_get_property(struct acpi_device *adev,
 }
 
 static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode,
-				const char *name, const char *cells_name,
-				size_t index, struct acpi_reference_args *args)
+				const char *name, size_t index,
+				struct acpi_reference_args *args)
 {
 	return -ENXIO;
 }
-- 
cgit v1.2.3


From 69030dd1c3671625c6f766af0b64a4bb4409ac3b Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 1 Dec 2015 16:52:14 -0800
Subject: cpufreq: use last policy after online for drivers with ->setpolicy

For cpufreq drivers which use setpolicy interface, after offline->online
the policy is set to default. This can be reproduced by setting the
default policy of intel_pstate or longrun to ondemand and then change to
"performance". After offline and online, the setpolicy will be called with
the policy=ondemand.

For drivers using governors this condition is handled by storing
last_governor, during offline and restoring during online. The same should
be done for drivers using setpolicy interface. Storing last_policy during
offline and restoring during online.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index ef4c5b1a860f..177c7680c1a8 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -77,6 +77,7 @@ struct cpufreq_policy {
 	unsigned int		suspend_freq; /* freq to set during suspend */
 
 	unsigned int		policy; /* see above */
+	unsigned int		last_policy; /* policy before unplug */
 	struct cpufreq_governor	*governor; /* see below */
 	void			*governor_data;
 	bool			governor_enabled; /* governor start/stop flag */
-- 
cgit v1.2.3


From 45f6fad84cc305103b28d73482b344d7f5b76f39 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 29 Nov 2015 19:37:57 -0800
Subject: ipv6: add complete rcu protection around np->opt

This patch addresses multiple problems :

UDP/RAW sendmsg() need to get a stable struct ipv6_txoptions
while socket is not locked : Other threads can change np->opt
concurrently. Dmitry posted a syzkaller
(http://github.com/google/syzkaller) program desmonstrating
use-after-free.

Starting with TCP/DCCP lockless listeners, tcp_v6_syn_recv_sock()
and dccp_v6_request_recv_sock() also need to use RCU protection
to dereference np->opt once (before calling ipv6_dup_options())

This patch adds full RCU protection to np->opt

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 0ef2a97ccdb5..402753bccafa 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -227,7 +227,7 @@ struct ipv6_pinfo {
 	struct ipv6_ac_socklist	*ipv6_ac_list;
 	struct ipv6_fl_socklist __rcu *ipv6_fl_list;
 
-	struct ipv6_txoptions	*opt;
+	struct ipv6_txoptions __rcu	*opt;
 	struct sk_buff		*pktoptions;
 	struct sk_buff		*rxpmtu;
 	struct inet6_cork	cork;
-- 
cgit v1.2.3


From 1f7dd3e5a6e4f093017fff12232572ee1aa4639b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 3 Dec 2015 10:18:21 -0500
Subject: cgroup: fix handling of multi-destination migration from
 subtree_control enabling

Consider the following v2 hierarchy.

  P0 (+memory) --- P1 (-memory) --- A
                                 \- B

P0 has memory enabled in its subtree_control while P1 doesn't.  If
both A and B contain processes, they would belong to the memory css of
P1.  Now if memory is enabled on P1's subtree_control, memory csses
should be created on both A and B and A's processes should be moved to
the former and B's processes the latter.  IOW, enabling controllers
can cause atomic migrations into different csses.

The core cgroup migration logic has been updated accordingly but the
controller migration methods haven't and still assume that all tasks
migrate to a single target css; furthermore, the methods were fed the
css in which subtree_control was updated which is the parent of the
target csses.  pids controller depends on the migration methods to
move charges and this made the controller attribute charges to the
wrong csses often triggering the following warning by driving a
counter negative.

 WARNING: CPU: 1 PID: 1 at kernel/cgroup_pids.c:97 pids_cancel.constprop.6+0x31/0x40()
 Modules linked in:
 CPU: 1 PID: 1 Comm: systemd Not tainted 4.4.0-rc1+ #29
 ...
  ffffffff81f65382 ffff88007c043b90 ffffffff81551ffc 0000000000000000
  ffff88007c043bc8 ffffffff810de202 ffff88007a752000 ffff88007a29ab00
  ffff88007c043c80 ffff88007a1d8400 0000000000000001 ffff88007c043bd8
 Call Trace:
  [<ffffffff81551ffc>] dump_stack+0x4e/0x82
  [<ffffffff810de202>] warn_slowpath_common+0x82/0xc0
  [<ffffffff810de2fa>] warn_slowpath_null+0x1a/0x20
  [<ffffffff8118e031>] pids_cancel.constprop.6+0x31/0x40
  [<ffffffff8118e0fd>] pids_can_attach+0x6d/0xf0
  [<ffffffff81188a4c>] cgroup_taskset_migrate+0x6c/0x330
  [<ffffffff81188e05>] cgroup_migrate+0xf5/0x190
  [<ffffffff81189016>] cgroup_attach_task+0x176/0x200
  [<ffffffff8118949d>] __cgroup_procs_write+0x2ad/0x460
  [<ffffffff81189684>] cgroup_procs_write+0x14/0x20
  [<ffffffff811854e5>] cgroup_file_write+0x35/0x1c0
  [<ffffffff812e26f1>] kernfs_fop_write+0x141/0x190
  [<ffffffff81265f88>] __vfs_write+0x28/0xe0
  [<ffffffff812666fc>] vfs_write+0xac/0x1a0
  [<ffffffff81267019>] SyS_write+0x49/0xb0
  [<ffffffff81bcef32>] entry_SYSCALL_64_fastpath+0x12/0x76

This patch fixes the bug by removing @css parameter from the three
migration methods, ->can_attach, ->cancel_attach() and ->attach() and
updating cgroup_taskset iteration helpers also return the destination
css in addition to the task being migrated.  All controllers are
updated accordingly.

* Controllers which don't care whether there are one or multiple
  target csses can be converted trivially.  cpu, io, freezer, perf,
  netclassid and netprio fall in this category.

* cpuset's current implementation assumes that there's single source
  and destination and thus doesn't support v2 hierarchy already.  The
  only change made by this patchset is how that single destination css
  is obtained.

* memory migration path already doesn't do anything on v2.  How the
  single destination css is obtained is updated and the prep stage of
  mem_cgroup_can_attach() is reordered to accomodate the change.

* pids is the only controller which was affected by this bug.  It now
  correctly handles multi-destination migrations and no longer causes
  counter underflow from incorrect accounting.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-tested-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Cc: Aleksa Sarai <cyphar@cyphar.com>
---
 include/linux/cgroup-defs.h |  9 +++------
 include/linux/cgroup.h      | 33 ++++++++++++++++++++++-----------
 2 files changed, 25 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 869fd4a3d28e..06b77f9dd3f2 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -422,12 +422,9 @@ struct cgroup_subsys {
 	void (*css_reset)(struct cgroup_subsys_state *css);
 	void (*css_e_css_changed)(struct cgroup_subsys_state *css);
 
-	int (*can_attach)(struct cgroup_subsys_state *css,
-			  struct cgroup_taskset *tset);
-	void (*cancel_attach)(struct cgroup_subsys_state *css,
-			      struct cgroup_taskset *tset);
-	void (*attach)(struct cgroup_subsys_state *css,
-		       struct cgroup_taskset *tset);
+	int (*can_attach)(struct cgroup_taskset *tset);
+	void (*cancel_attach)(struct cgroup_taskset *tset);
+	void (*attach)(struct cgroup_taskset *tset);
 	int (*can_fork)(struct task_struct *task, void **priv_p);
 	void (*cancel_fork)(struct task_struct *task, void *priv);
 	void (*fork)(struct task_struct *task, void *priv);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f64083030ad5..cb91b44f5f78 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -120,8 +120,10 @@ struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state
 struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos,
 						     struct cgroup_subsys_state *css);
 
-struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
-struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
+struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
+					 struct cgroup_subsys_state **dst_cssp);
+struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
+					struct cgroup_subsys_state **dst_cssp);
 
 void css_task_iter_start(struct cgroup_subsys_state *css,
 			 struct css_task_iter *it);
@@ -236,30 +238,39 @@ void css_task_iter_end(struct css_task_iter *it);
 /**
  * cgroup_taskset_for_each - iterate cgroup_taskset
  * @task: the loop cursor
+ * @dst_css: the destination css
  * @tset: taskset to iterate
  *
  * @tset may contain multiple tasks and they may belong to multiple
- * processes.  When there are multiple tasks in @tset, if a task of a
- * process is in @tset, all tasks of the process are in @tset.  Also, all
- * are guaranteed to share the same source and destination csses.
+ * processes.
+ *
+ * On the v2 hierarchy, there may be tasks from multiple processes and they
+ * may not share the source or destination csses.
+ *
+ * On traditional hierarchies, when there are multiple tasks in @tset, if a
+ * task of a process is in @tset, all tasks of the process are in @tset.
+ * Also, all are guaranteed to share the same source and destination csses.
  *
  * Iteration is not in any specific order.
  */
-#define cgroup_taskset_for_each(task, tset)				\
-	for ((task) = cgroup_taskset_first((tset)); (task);		\
-	     (task) = cgroup_taskset_next((tset)))
+#define cgroup_taskset_for_each(task, dst_css, tset)			\
+	for ((task) = cgroup_taskset_first((tset), &(dst_css));		\
+	     (task);							\
+	     (task) = cgroup_taskset_next((tset), &(dst_css)))
 
 /**
  * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
  * @leader: the loop cursor
+ * @dst_css: the destination css
  * @tset: takset to iterate
  *
  * Iterate threadgroup leaders of @tset.  For single-task migrations, @tset
  * may not contain any.
  */
-#define cgroup_taskset_for_each_leader(leader, tset)			\
-	for ((leader) = cgroup_taskset_first((tset)); (leader);		\
-	     (leader) = cgroup_taskset_next((tset)))			\
+#define cgroup_taskset_for_each_leader(leader, dst_css, tset)		\
+	for ((leader) = cgroup_taskset_first((tset), &(dst_css));	\
+	     (leader);							\
+	     (leader) = cgroup_taskset_next((tset), &(dst_css)))	\
 		if ((leader) != (leader)->group_leader)			\
 			;						\
 		else
-- 
cgit v1.2.3


From ae5515d66362b9d96cdcfce504567f0b8b7bd83e Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 4 Dec 2015 08:38:42 -0700
Subject: Revert: "vfio: Include No-IOMMU mode"

Revert commit 033291eccbdb ("vfio: Include No-IOMMU mode") due to lack
of a user.  This was originally intended to fill a need for the DPDK
driver, but uptake has been slow so rather than support an unproven
kernel interface revert it and revisit when userspace catches up.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/vfio.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 610a86a892b8..ddb440975382 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -44,9 +44,6 @@ struct vfio_device_ops {
 	void	(*request)(void *device_data, unsigned int count);
 };
 
-extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
-extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
-
 extern int vfio_add_group_dev(struct device *dev,
 			      const struct vfio_device_ops *ops,
 			      void *device_data);
-- 
cgit v1.2.3


From ea013a9b205b47b1fcbc72522146fad560af0712 Mon Sep 17 00:00:00 2001
From: Andreas Werner <andreas.werner@men.de>
Date: Fri, 4 Dec 2015 18:12:49 +0100
Subject: libata-eh.c: Introduce new ata port flag for controller which lockup
 on read log page

Some controller lockup on a ata_read_log_page.
Add new ata port flag ATA_FLAG_NO_LOG_PAGE which can used
to blacklist a controller.

If this flag is set, any attempt to read a log page returns an error
without actually issuing the command.

Signed-off-by: Andreas Werner <andreas.werner@men.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/libata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 83577f8fd15b..600c1e0626a5 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -210,6 +210,7 @@ enum {
 	ATA_FLAG_SLAVE_POSS	= (1 << 0), /* host supports slave dev */
 					    /* (doesn't imply presence) */
 	ATA_FLAG_SATA		= (1 << 1),
+	ATA_FLAG_NO_LOG_PAGE	= (1 << 5), /* do not issue log page read */
 	ATA_FLAG_NO_ATAPI	= (1 << 6), /* No ATAPI support */
 	ATA_FLAG_PIO_DMA	= (1 << 7), /* PIO cmds via DMA */
 	ATA_FLAG_PIO_LBA48	= (1 << 8), /* Host DMA engine is LBA28 only */
-- 
cgit v1.2.3


From 57b4bd06ff0372fe1e3617889c4b37fbd500364a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Sun, 6 Dec 2015 11:25:47 +0100
Subject: lightnvm: comments on constants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is not obvious what NVM_IO_* and NVM_BLK_T_* are used for. Make sure
to comment them appropriately as the other constants.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index c6916aec43b6..935ef3844c05 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -50,9 +50,16 @@ enum {
 	NVM_IO_DUAL_ACCESS	= 0x1,
 	NVM_IO_QUAD_ACCESS	= 0x2,
 
+	/* NAND Access Modes */
 	NVM_IO_SUSPEND		= 0x80,
 	NVM_IO_SLC_MODE		= 0x100,
 	NVM_IO_SCRAMBLE_DISABLE	= 0x200,
+
+	/* Block Types */
+	NVM_BLK_T_FREE		= 0x0,
+	NVM_BLK_T_BAD		= 0x1,
+	NVM_BLK_T_DEV		= 0x2,
+	NVM_BLK_T_HOST		= 0x4,
 };
 
 struct nvm_id_group {
-- 
cgit v1.2.3


From 16f26c3aa9b9c36a9d1092ae3258461d1008481e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Sun, 6 Dec 2015 11:25:48 +0100
Subject: lightnvm: replace req queue with nvmdev for lld
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the case where a request queue is passed to the low lever lightnvm
device drive integration, the device driver might pass its admin
commands through another queue. Instead pass nvm_dev, and let the
low level drive the appropriate queue.

Reported-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 935ef3844c05..034117b3be5f 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -183,17 +183,17 @@ struct nvm_block;
 
 typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
 typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *);
-typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *);
-typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32,
+typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *);
+typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32,
 				nvm_l2p_update_fn *, void *);
 typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int,
 				nvm_bb_update_fn *, void *);
-typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int);
-typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *);
-typedef int (nvm_erase_blk_fn)(struct request_queue *, struct nvm_rq *);
-typedef void *(nvm_create_dma_pool_fn)(struct request_queue *, char *);
+typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int);
+typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
 typedef void (nvm_destroy_dma_pool_fn)(void *);
-typedef void *(nvm_dev_dma_alloc_fn)(struct request_queue *, void *, gfp_t,
+typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
 								dma_addr_t *);
 typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
 
-- 
cgit v1.2.3


From a5e14ba334e202c58e45ef47414ec94c585c1a8c Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Wed, 28 Oct 2015 13:28:15 +0200
Subject: mlx4: Expose correct max_sge_rd limit

mlx4 devices (ConnectX-2, ConnectX-3) has a limitation
where rdma read work queue entries cannot exceed 512 bytes.
A rdma_read wqe needs to fit in 512 bytes:
- wqe control segment (16 bytes)
- rdma segment (16 bytes)
- scatter elements (16 bytes each)

So max_sge_rd should be: (512 - 16 - 16) / 16 = 30.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/mlx4/device.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 7501626ab529..d3133be12d92 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -426,6 +426,17 @@ enum {
 	MLX4_MAX_FAST_REG_PAGES = 511,
 };
 
+enum {
+	/*
+	 * Max wqe size for rdma read is 512 bytes, so this
+	 * limits our max_sge_rd as the wqe needs to fit:
+	 * - ctrl segment (16 bytes)
+	 * - rdma segment (16 bytes)
+	 * - scatter elements (16 bytes each)
+	 */
+	MLX4_MAX_SGE_RD	= (512 - 16 - 16) / 16
+};
+
 enum {
 	MLX4_DEV_PMC_SUBTYPE_GUID_INFO	 = 0x14,
 	MLX4_DEV_PMC_SUBTYPE_PORT_INFO	 = 0x15,
-- 
cgit v1.2.3


From 4c3141e09cfa6460bfcd5e90f73e498db654c917 Mon Sep 17 00:00:00 2001
From: Carlo Caione <carlo@endlessm.com>
Date: Tue, 1 Dec 2015 17:24:17 +0100
Subject: of/irq: Export of_irq_find_parent again

of_irq_find_parent was made static since it had no users outside of
of_irq.c. Export it again since we are going to use it again.

Signed-off-by: Carlo Caione <carlo@endlessm.com>
[robh: move of_irq_find_parent to correct ifdef section]
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of_irq.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 039f2eec49ce..f648acf27ed7 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -46,6 +46,7 @@ extern int of_irq_get(struct device_node *dev, int index);
 extern int of_irq_get_byname(struct device_node *dev, const char *name);
 extern int of_irq_to_resource_table(struct device_node *dev,
 		struct resource *res, int nr_irqs);
+extern struct device_node *of_irq_find_parent(struct device_node *child);
 extern struct irq_domain *of_msi_get_domain(struct device *dev,
 					    struct device_node *np,
 					    enum irq_domain_bus_token token);
@@ -70,6 +71,11 @@ static inline int of_irq_to_resource_table(struct device_node *dev,
 {
 	return 0;
 }
+static inline void *of_irq_find_parent(struct device_node *child)
+{
+	return NULL;
+}
+
 static inline struct irq_domain *of_msi_get_domain(struct device *dev,
 						   struct device_node *np,
 						   enum irq_domain_bus_token token)
-- 
cgit v1.2.3


From eaddb5725357e9f05ffe5d271630f8197d089da4 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 9 Dec 2015 09:11:10 -0600
Subject: of/irq: move of_msi_map_rid declaration to the correct ifdef section

In checking fixes for of_irq_find_parent declaration location, I found
that of_msi_map_rid is also wrong. of_msi_map_rid is not implemented for
Sparc, so it should not be in the Sparc specific section of the header.
Move it to just depend on OF_IRQ.

Cc: Frank Rowand <frowand.list@gmail.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of_irq.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index f648acf27ed7..1e0deb8e8494 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -53,6 +53,7 @@ extern struct irq_domain *of_msi_get_domain(struct device *dev,
 extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
 						       u32 rid);
 extern void of_msi_configure(struct device *dev, struct device_node *np);
+u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in);
 #else
 static inline int of_irq_count(struct device_node *dev)
 {
@@ -90,6 +91,11 @@ static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev
 static inline void of_msi_configure(struct device *dev, struct device_node *np)
 {
 }
+static inline u32 of_msi_map_rid(struct device *dev,
+				 struct device_node *msi_np, u32 rid_in)
+{
+	return rid_in;
+}
 #endif
 
 #if defined(CONFIG_OF_IRQ) || defined(CONFIG_SPARC)
@@ -99,7 +105,6 @@ static inline void of_msi_configure(struct device *dev, struct device_node *np)
  * so declare it here regardless of the CONFIG_OF_IRQ setting.
  */
 extern unsigned int irq_of_parse_and_map(struct device_node *node, int index);
-u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in);
 
 #else /* !CONFIG_OF && !CONFIG_SPARC */
 static inline unsigned int irq_of_parse_and_map(struct device_node *dev,
@@ -107,12 +112,6 @@ static inline unsigned int irq_of_parse_and_map(struct device_node *dev,
 {
 	return 0;
 }
-
-static inline u32 of_msi_map_rid(struct device *dev,
-				 struct device_node *msi_np, u32 rid_in)
-{
-	return rid_in;
-}
 #endif /* !CONFIG_OF */
 
 #endif /* __OF_IRQ_H */
-- 
cgit v1.2.3


From d7e35dfa2531b53618b9e6edcd8752ce988ac555 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 3 Dec 2015 22:04:01 -0500
Subject: bitops.h: correctly handle rol32 with 0 byte shift

ROL on a 32 bit integer with a shift of 32 or more is undefined and the
result is arch-dependent. Avoid this by handling the trivial case of
roling by 0 correctly.

The trivial solution of checking if shift is 0 breaks gcc's detection
of this code as a ROL instruction, which is unacceptable.

This bug was reported and fixed in GCC
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57157):

	The standard rotate idiom,

	  (x << n) | (x >> (32 - n))

	is recognized by gcc (for concreteness, I discuss only the case that x
	is an uint32_t here).

	However, this is portable C only for n in the range 0 < n < 32. For n
	== 0, we get x >> 32 which gives undefined behaviour according to the
	C standard (6.5.7, Bitwise shift operators). To portably support n ==
	0, one has to write the rotate as something like

	  (x << n) | (x >> ((-n) & 31))

	And this is apparently not recognized by gcc.

Note that this is broken on older GCCs and will result in slower ROL.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 2b8ed123ad36..defeaac0745f 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -107,7 +107,7 @@ static inline __u64 ror64(__u64 word, unsigned int shift)
  */
 static inline __u32 rol32(__u32 word, unsigned int shift)
 {
-	return (word << shift) | (word >> (32 - shift));
+	return (word << shift) | (word >> ((-shift) & 31));
 }
 
 /**
-- 
cgit v1.2.3


From 059393c5bdd1420bdf1bed2972f33196dff263ae Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 7 Dec 2015 10:11:11 +0000
Subject: irqchip/gic-v3: Add missing struct device_node declaration

When the GICv3 header file is used in a C file that doesn't include
any of the OF stuff, we end up with a bunch of ugly warnings.

Let's keep GCC quiet by adding a forward declaration.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1449483072-17694-2-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqchip/arm-gic-v3.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index c9ae0c6ec050..d5d798b35c1f 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -330,6 +330,7 @@ struct rdists {
 };
 
 struct irq_domain;
+struct device_node;
 int its_cpu_init(void);
 int its_init(struct device_node *node, struct rdists *rdists,
 	     struct irq_domain *domain);
-- 
cgit v1.2.3


From ad87e03213b552a5c33d5e1e7a19a73768397010 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 10 Dec 2015 15:27:21 -0500
Subject: USB: add quirk for devices with broken LPM

Some USB device / host controller combinations seem to have problems
with Link Power Management.  For example, Steinar found that his xHCI
controller wouldn't handle bandwidth calculations correctly for two
video cards simultaneously when LPM was enabled, even though the bus
had plenty of bandwidth available.

This patch introduces a new quirk flag for devices that should remain
disabled for LPM, and creates quirk entries for Steinar's devices.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: Steinar H. Gunderson <sgunderson@bigfoot.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/quirks.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 9948c874e3f1..1d0043dc34e4 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -47,4 +47,7 @@
 /* device generates spurious wakeup, ignore remote wakeup capability */
 #define USB_QUIRK_IGNORE_REMOTE_WAKEUP		BIT(9)
 
+/* device can't handle Link Power Management */
+#define USB_QUIRK_NO_LPM			BIT(10)
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From 98e89cf02aed11166698dd53c6f14865613babb3 Mon Sep 17 00:00:00 2001
From: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Date: Fri, 11 Dec 2015 13:40:43 -0800
Subject: mm: kmemleak: mark kmemleak_init prototype as __init

The kmemleak_init() definition in mm/kmemleak.c is marked __init but its
prototype in include/linux/kmemleak.h is marked __ref since commit
a6186d89c913 ("kmemleak: Mark the early log buffer as __initdata").

This causes a section mismatch which is reported as a warning when
building with clang -Wsection, because kmemleak_init() is declared in
section .ref.text but defined in .init.text.

Fix this by marking kmemleak_init() prototype __init.

Signed-off-by: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmemleak.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index d0a1f99e24e3..4894c6888bc6 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -25,7 +25,7 @@
 
 #ifdef CONFIG_DEBUG_KMEMLEAK
 
-extern void kmemleak_init(void) __ref;
+extern void kmemleak_init(void) __init;
 extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
 			   gfp_t gfp) __ref;
 extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
-- 
cgit v1.2.3


From 86fffe4a61dd972d5a4e23260d530be6da02f614 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 11 Dec 2015 13:40:46 -0800
Subject: kernel: remove stop_machine() Kconfig dependency

Currently the full stop_machine() routine is only enabled on SMP if
module unloading is enabled, or if the CPUs are hotpluggable.  This
leads to configurations where stop_machine() is broken as it will then
only run the callback on the local CPU with irqs disabled, and not stop
the other CPUs or run the callback on them.

For example, this breaks MTRR setup on x86 in certain configs since
ea8596bb2d8d379 ("kprobes/x86: Remove unused text_poke_smp() and
text_poke_smp_batch() functions") as the MTRR is only established on the
boot CPU.

This patch removes the Kconfig option for STOP_MACHINE and uses the SMP
and HOTPLUG_CPU config options to compile the correct stop_machine() for
the architecture, removing the false dependency on MODULE_UNLOAD in the
process.

Link: https://lkml.org/lkml/2014/10/8/124
References: https://bugs.freedesktop.org/show_bug.cgi?id=84794
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Pranith Kumar <bobby.prani@gmail.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Iulia Manda <iulia.manda21@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Chuck Ebbert <cebbert.lkml@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/stop_machine.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 0adedca24c5b..0e1b1540597a 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -99,7 +99,7 @@ static inline int try_stop_cpus(const struct cpumask *cpumask,
  * grabbing every spinlock (and more).  So the "read" side to such a
  * lock is anything which disables preemption.
  */
-#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
+#if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU)
 
 /**
  * stop_machine: freeze the machine on all CPUs and run this function
@@ -118,7 +118,7 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
 
 int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 				   const struct cpumask *cpus);
-#else	 /* CONFIG_STOP_MACHINE && CONFIG_SMP */
+#else	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
 
 static inline int stop_machine(cpu_stop_fn_t fn, void *data,
 				 const struct cpumask *cpus)
@@ -137,5 +137,5 @@ static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 	return stop_machine(fn, data, cpus);
 }
 
-#endif	/* CONFIG_STOP_MACHINE && CONFIG_SMP */
+#endif	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
 #endif	/* _LINUX_STOP_MACHINE */
-- 
cgit v1.2.3


From dfd01f026058a59a513f8a365b439a0681b803af Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sun, 13 Dec 2015 22:11:16 +0100
Subject: sched/wait: Fix the signal handling fix

Jan Stancek reported that I wrecked things for him by fixing things for
Vladimir :/

His report was due to an UNINTERRUPTIBLE wait getting -EINTR, which
should not be possible, however my previous patch made this possible by
unconditionally checking signal_pending().

We cannot use current->state as was done previously, because the
instruction after the store to that variable it can be changed.  We must
instead pass the initial state along and use that.

Fixes: 68985633bccb ("sched/wait: Fix signal handling in bit wait helpers")
Reported-by: Jan Stancek <jstancek@redhat.com>
Reported-by: Chris Mason <clm@fb.com>
Tested-by: Jan Stancek <jstancek@redhat.com>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com>
Tested-by: Chris Mason <clm@fb.com>
Reviewed-by: Paul Turner <pjt@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: tglx@linutronix.de
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: hpa@zytor.com
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/wait.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 1e1bf9f963a9..513b36f04dfd 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -145,7 +145,7 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
 	list_del(&old->task_list);
 }
 
-typedef int wait_bit_action_f(struct wait_bit_key *);
+typedef int wait_bit_action_f(struct wait_bit_key *, int mode);
 void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
@@ -960,10 +960,10 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 	} while (0)
 
 
-extern int bit_wait(struct wait_bit_key *);
-extern int bit_wait_io(struct wait_bit_key *);
-extern int bit_wait_timeout(struct wait_bit_key *);
-extern int bit_wait_io_timeout(struct wait_bit_key *);
+extern int bit_wait(struct wait_bit_key *, int);
+extern int bit_wait_io(struct wait_bit_key *, int);
+extern int bit_wait_timeout(struct wait_bit_key *, int);
+extern int bit_wait_io_timeout(struct wait_bit_key *, int);
 
 /**
  * wait_on_bit - wait for a bit to be cleared
-- 
cgit v1.2.3