From efd68e7254503f3207805f674a1ea1d743f5dfe2 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sun, 3 Jun 2012 22:04:33 -0700
Subject: devicetree: add helper inline for retrieving a node's full name

The pattern (np ? np->full_name : "<none>") is rather common in the
kernel, but can also make for quite long lines.  This patch adds a new
inline function, of_node_full_name() so that the test for a valid node
pointer doesn't need to be open coded at all call sites.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/of.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')
diff --git a/include/linux/of.h b/include/linux/of.h
index 2ec1083af7ff..1012377cae92 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -163,6 +163,11 @@ static inline int of_node_to_nid(struct device_node *np) { return -1; }
 #define of_node_to_nid of_node_to_nid
 #endif
 
+static inline const char* of_node_full_name(struct device_node *np)
+{
+	return np ? np->full_name : "<no-node>";
+}
+
 extern struct device_node *of_find_node_by_name(struct device_node *from,
 	const char *name);
 #define for_each_node_by_name(dn, name) \
@@ -303,6 +308,11 @@ const char *of_prop_next_string(struct property *prop, const char *cur);
 
 #else /* CONFIG_OF */
 
+static inline const char* of_node_full_name(struct device_node *np)
+{
+	return "<no-node>";
+}
+
 static inline bool of_have_populated_dt(void)
 {
 	return false;
-- 
cgit v1.2.3


From 5ca4db61e859526b2dbee3bcea3626d3de49a0b2 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 3 Jun 2012 22:04:34 -0700
Subject: irqdomain: Simple NUMA awareness.

While common irqdesc allocation is node aware, the irqdomain code is not.

Presently we observe a number of regressions/inconsistencies on
NUMA-capable platforms:

- Platforms using irqdomains with legacy mappings, where the
  irq_descs are allocated node-local and the irqdomain data
  structure is not.

- Drivers implementing irqdomains will lose node locality
  regardless of the underlying struct device's node id.

This plugs in NUMA node id proliferation across the various allocation
callsites by way of_node_to_nid() node lookup. While of_node_to_nid()
does the right thing for OF-capable platforms it doesn't presently handle
the non-DT case. This is trivially dealt with by simply wraping in to
numa_node_id() unconditionally.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 1012377cae92..76930ee78db5 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -21,6 +21,7 @@
 #include <linux/kref.h>
 #include <linux/mod_devicetable.h>
 #include <linux/spinlock.h>
+#include <linux/topology.h>
 
 #include <asm/byteorder.h>
 #include <asm/errno.h>
@@ -158,11 +159,6 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 
 #define OF_BAD_ADDR	((u64)-1)
 
-#ifndef of_node_to_nid
-static inline int of_node_to_nid(struct device_node *np) { return -1; }
-#define of_node_to_nid of_node_to_nid
-#endif
-
 static inline const char* of_node_full_name(struct device_node *np)
 {
 	return np ? np->full_name : "<no-node>";
@@ -412,6 +408,15 @@ static inline int of_machine_is_compatible(const char *compat)
 	while (0)
 #endif /* CONFIG_OF */
 
+#ifndef of_node_to_nid
+static inline int of_node_to_nid(struct device_node *np)
+{
+	return numa_node_id();
+}
+
+#define of_node_to_nid of_node_to_nid
+#endif
+
 /**
  * of_property_read_bool - Findfrom a property
  * @np:		device node from which the property value is to be read.
-- 
cgit v1.2.3


From a91a5ac6858fbf7477131e1210cb3e897b668e6f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 4 Jun 2012 20:40:53 -0700
Subject: mempool: add @gfp_mask to mempool_create_node()

mempool_create_node() currently assumes %GFP_KERNEL.  Its only user,
blk_init_free_list(), is about to be updated to use other allocation
flags - add @gfp_mask argument to the function.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/mempool.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 7c08052e3321..39ed62ab5b8a 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -26,7 +26,8 @@ typedef struct mempool_s {
 extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
 			mempool_free_t *free_fn, void *pool_data);
 extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
-			mempool_free_t *free_fn, void *pool_data, int nid);
+			mempool_free_t *free_fn, void *pool_data,
+			gfp_t gfp_mask, int nid);
 
 extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask);
 extern void mempool_destroy(mempool_t *pool);
-- 
cgit v1.2.3


From 86072d8112595ea1b6beeb33f578e7c2839e014e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 4 Jun 2012 20:40:54 -0700
Subject: block: drop custom queue draining used by scsi_transport_{iscsi|fc}

iscsi_remove_host() uses bsg_remove_queue() which implements custom
queue draining.  fc_bsg_remove() open-codes mostly identical logic.

The draining logic isn't correct in that blk_stop_queue() doesn't
prevent new requests from being queued - it just stops processing, so
nothing prevents new requests to be queued after the logic determines
that the queue is drained.

blk_cleanup_queue() now implements proper queue draining and these
custom draining logics aren't necessary.  Drop them and use
bsg_unregister_queue() + blk_cleanup_queue() instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Mike Christie <michaelc@cs.wisc.edu>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: James Smart <james.smart@emulex.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bsg-lib.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index f55ab8cdc106..4d0fb3df2f4a 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -67,7 +67,6 @@ void bsg_job_done(struct bsg_job *job, int result,
 int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
 		    bsg_job_fn *job_fn, int dd_job_size);
 void bsg_request_fn(struct request_queue *q);
-void bsg_remove_queue(struct request_queue *q);
 void bsg_goose_queue(struct request_queue *q);
 
 #endif
-- 
cgit v1.2.3


From 8a5ecdd42862bf87ceab00bf2a15d7eabf58c02d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 4 Jun 2012 20:40:58 -0700
Subject: block: add q->nr_rqs[] and move q->rq.elvpriv to q->nr_rqs_elvpriv

Add q->nr_rqs[] which currently behaves the same as q->rq.count[] and
move q->rq.elvpriv to q->nr_rqs_elvpriv.  blk_drain_queue() is updated
to use q->nr_rqs[] instead of q->rq.count[].

These counters separates queue-wide request statistics from the
request list and allow implementation of per-queue request allocation.

While at it, properly indent fields of struct request_list.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 07954b05b86c..7e44ed93f84b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -51,11 +51,10 @@ struct request_list {
 	 * count[], starved[], and wait[] are indexed by
 	 * BLK_RW_SYNC/BLK_RW_ASYNC
 	 */
-	int count[2];
-	int starved[2];
-	int elvpriv;
-	mempool_t *rq_pool;
-	wait_queue_head_t wait[2];
+	int			count[2];
+	int			starved[2];
+	mempool_t		*rq_pool;
+	wait_queue_head_t	wait[2];
 };
 
 /*
@@ -282,6 +281,8 @@ struct request_queue {
 	struct list_head	queue_head;
 	struct request		*last_merge;
 	struct elevator_queue	*elevator;
+	int			nr_rqs[2];	/* # allocated [a]sync rqs */
+	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
 	/*
 	 * the queue request freelist, one for reads and one for writes
-- 
cgit v1.2.3


From 5b788ce3e2acac9bf109743b1281d77347cf2101 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 4 Jun 2012 20:40:59 -0700
Subject: block: prepare for multiple request_lists

Request allocation is about to be made per-blkg meaning that there'll
be multiple request lists.

* Make queue full state per request_list.  blk_*queue_full() functions
  are renamed to blk_*rl_full() and takes @rl instead of @q.

* Rename blk_init_free_list() to blk_init_rl() and make it take @rl
  instead of @q.  Also add @gfp_mask parameter.

* Add blk_exit_rl() instead of destroying rl directly from
  blk_release_queue().

* Add request_list->q and make request alloc/free functions -
  blk_free_request(), [__]freed_request(), __get_request() - take @rl
  instead of @q.

This patch doesn't introduce any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7e44ed93f84b..f2385ee7c7b2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -46,7 +46,12 @@ struct blkcg_gq;
 struct request;
 typedef void (rq_end_io_fn)(struct request *, int);
 
+#define BLK_RL_SYNCFULL		(1U << 0)
+#define BLK_RL_ASYNCFULL	(1U << 1)
+
 struct request_list {
+	struct request_queue	*q;	/* the queue this rl belongs to */
+
 	/*
 	 * count[], starved[], and wait[] are indexed by
 	 * BLK_RW_SYNC/BLK_RW_ASYNC
@@ -55,6 +60,7 @@ struct request_list {
 	int			starved[2];
 	mempool_t		*rq_pool;
 	wait_queue_head_t	wait[2];
+	unsigned int		flags;
 };
 
 /*
@@ -562,27 +568,25 @@ static inline bool rq_is_sync(struct request *rq)
 	return rw_is_sync(rq->cmd_flags);
 }
 
-static inline int blk_queue_full(struct request_queue *q, int sync)
+static inline bool blk_rl_full(struct request_list *rl, bool sync)
 {
-	if (sync)
-		return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags);
-	return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags);
+	unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
+
+	return rl->flags & flag;
 }
 
-static inline void blk_set_queue_full(struct request_queue *q, int sync)
+static inline void blk_set_rl_full(struct request_list *rl, bool sync)
 {
-	if (sync)
-		queue_flag_set(QUEUE_FLAG_SYNCFULL, q);
-	else
-		queue_flag_set(QUEUE_FLAG_ASYNCFULL, q);
+	unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
+
+	rl->flags |= flag;
 }
 
-static inline void blk_clear_queue_full(struct request_queue *q, int sync)
+static inline void blk_clear_rl_full(struct request_list *rl, bool sync)
 {
-	if (sync)
-		queue_flag_clear(QUEUE_FLAG_SYNCFULL, q);
-	else
-		queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q);
+	unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
+
+	rl->flags &= ~flag;
 }
 
 
-- 
cgit v1.2.3


From a051661ca6d134c18599498b185b667859d4339b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 26 Jun 2012 15:05:44 -0700
Subject: blkcg: implement per-blkg request allocation

Currently, request_queue has one request_list to allocate requests
from regardless of blkcg of the IO being issued.  When the unified
request pool is used up, cfq proportional IO limits become meaningless
- whoever grabs the next request being freed wins the race regardless
of the configured weights.

This can be easily demonstrated by creating a blkio cgroup w/ very low
weight, put a program which can issue a lot of random direct IOs there
and running a sequential IO from a different cgroup.  As soon as the
request pool is used up, the sequential IO bandwidth crashes.

This patch implements per-blkg request_list.  Each blkg has its own
request_list and any IO allocates its request from the matching blkg
making blkcgs completely isolated in terms of request allocation.

* Root blkcg uses the request_list embedded in each request_queue,
  which was renamed to @q->root_rl from @q->rq.  While making blkcg rl
  handling a bit harier, this enables avoiding most overhead for root
  blkcg.

* Queue fullness is properly per request_list but bdi isn't blkcg
  aware yet, so congestion state currently just follows the root
  blkcg.  As writeback isn't aware of blkcg yet, this works okay for
  async congestion but readahead may get the wrong signals.  It's
  better than blkcg completely collapsing with shared request_list but
  needs to be improved with future changes.

* After this change, each block cgroup gets a full request pool making
  resource consumption of each cgroup higher.  This makes allowing
  non-root users to create cgroups less desirable; however, note that
  allowing non-root users to directly manage cgroups is already
  severely broken regardless of this patch - each block cgroup
  consumes kernel memory and skews IO weight (IO weights are not
  hierarchical).

v2: queue-sysfs.txt updated and patch description udpated as suggested
    by Vivek.

v3: blk_get_rl() wasn't checking error return from
    blkg_lookup_create() and may cause oops on lookup failure.  Fix it
    by falling back to root_rl on blkg lookup failures.  This problem
    was spotted by Rakesh Iyer <rni@google.com>.

v4: Updated to accomodate 458f27a982 "block: Avoid missed wakeup in
    request waitqueue".  blk_drain_queue() now wakes up waiters on all
    blkg->rl on the target queue.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f2385ee7c7b2..3816ce8a08fc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -51,7 +51,9 @@ typedef void (rq_end_io_fn)(struct request *, int);
 
 struct request_list {
 	struct request_queue	*q;	/* the queue this rl belongs to */
-
+#ifdef CONFIG_BLK_CGROUP
+	struct blkcg_gq		*blkg;	/* blkg this request pool belongs to */
+#endif
 	/*
 	 * count[], starved[], and wait[] are indexed by
 	 * BLK_RW_SYNC/BLK_RW_ASYNC
@@ -143,6 +145,7 @@ struct request {
 	struct hd_struct *part;
 	unsigned long start_time;
 #ifdef CONFIG_BLK_CGROUP
+	struct request_list *rl;		/* rl this rq is alloced from */
 	unsigned long long start_time_ns;
 	unsigned long long io_start_time_ns;    /* when passed to hardware */
 #endif
@@ -291,9 +294,12 @@ struct request_queue {
 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
 	/*
-	 * the queue request freelist, one for reads and one for writes
+	 * If blkcg is not used, @q->root_rl serves all requests.  If blkcg
+	 * is used, root blkg allocates from @q->root_rl and all other
+	 * blkgs from their own blkg->rl.  Which one to use should be
+	 * determined using bio_request_list().
 	 */
-	struct request_list	rq;
+	struct request_list	root_rl;
 
 	request_fn_proc		*request_fn;
 	make_request_fn		*make_request_fn;
-- 
cgit v1.2.3


From 48afb3112e6373a292e54d675e986a5da14c0516 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 26 May 2012 16:58:15 +0100
Subject: dmaengine: PL08x: remove circular_buffer boolean from channel data

Circular buffers are not handled in this way; we have a separate API
call now to setup circular buffers.  So lets not mislead people with
this bool.

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/pl08x.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 02549017212a..0f5b34d668b6 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -51,9 +51,6 @@ enum {
  * can be the address of a FIFO register for burst requests for example.
  * This can be left undefined if the PrimeCell API is used for configuring
  * this.
- * @circular_buffer: whether the buffer passed in is circular and
- * shall simply be looped round round (like a record baby round
- * round round round)
  * @single: the device connected to this channel will request single DMA
  * transfers, not bursts. (Bursts are default.)
  * @periph_buses: the device connected to this channel is accessible via
@@ -66,7 +63,6 @@ struct pl08x_channel_data {
 	u32 muxval;
 	u32 cctl;
 	dma_addr_t addr;
-	bool circular_buffer;
 	bool single;
 	u8 periph_buses;
 };
-- 
cgit v1.2.3


From aeea1808fe752e917b966961bde3e9603f206dec Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 17 May 2012 15:01:28 +0100
Subject: dmaengine: PL08x: clean up get_signal/put_signal

Try to avoid dereferencing the DMA engine's channel struct in these
platform helpers; instead, pass a pointer to the channel data into
get_signal(), and the returned signal number to put_signal().

Tested-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/pl08x.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 0f5b34d668b6..88765a62c8f2 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -225,8 +225,8 @@ struct pl08x_platform_data {
 	const struct pl08x_channel_data *slave_channels;
 	unsigned int num_slave_channels;
 	struct pl08x_channel_data memcpy_channel;
-	int (*get_signal)(struct pl08x_dma_chan *);
-	void (*put_signal)(struct pl08x_dma_chan *);
+	int (*get_signal)(const struct pl08x_channel_data *);
+	void (*put_signal)(const struct pl08x_channel_data *, int);
 	u8 lli_buses;
 	u8 mem_buses;
 };
-- 
cgit v1.2.3


From b23f204c8dbbed8e501442c47d7639aac21a3d84 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 16 May 2012 10:48:44 +0100
Subject: dmaengine: PL08x: move private data structures into amba-pl08x.c

Move the driver private data structures into the driver itself, rather
than having them exposed to everyone in a header file.

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/pl08x.h | 141 +--------------------------------------------
 1 file changed, 2 insertions(+), 139 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 88765a62c8f2..48d02bf66ec9 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -21,8 +21,9 @@
 #include <linux/dmaengine.h>
 #include <linux/interrupt.h>
 
-struct pl08x_lli;
 struct pl08x_driver_data;
+struct pl08x_phy_chan;
+struct pl08x_txd;
 
 /* Bitmasks for selecting AHB ports for DMA transfers */
 enum {
@@ -67,144 +68,6 @@ struct pl08x_channel_data {
 	u8 periph_buses;
 };
 
-/**
- * Struct pl08x_bus_data - information of source or destination
- * busses for a transfer
- * @addr: current address
- * @maxwidth: the maximum width of a transfer on this bus
- * @buswidth: the width of this bus in bytes: 1, 2 or 4
- */
-struct pl08x_bus_data {
-	dma_addr_t addr;
-	u8 maxwidth;
-	u8 buswidth;
-};
-
-/**
- * struct pl08x_phy_chan - holder for the physical channels
- * @id: physical index to this channel
- * @lock: a lock to use when altering an instance of this struct
- * @signal: the physical signal (aka channel) serving this physical channel
- * right now
- * @serving: the virtual channel currently being served by this physical
- * channel
- * @locked: channel unavailable for the system, e.g. dedicated to secure
- * world
- */
-struct pl08x_phy_chan {
-	unsigned int id;
-	void __iomem *base;
-	spinlock_t lock;
-	int signal;
-	struct pl08x_dma_chan *serving;
-	bool locked;
-};
-
-/**
- * struct pl08x_sg - structure containing data per sg
- * @src_addr: src address of sg
- * @dst_addr: dst address of sg
- * @len: transfer len in bytes
- * @node: node for txd's dsg_list
- */
-struct pl08x_sg {
-	dma_addr_t src_addr;
-	dma_addr_t dst_addr;
-	size_t len;
-	struct list_head node;
-};
-
-/**
- * struct pl08x_txd - wrapper for struct dma_async_tx_descriptor
- * @tx: async tx descriptor
- * @node: node for txd list for channels
- * @dsg_list: list of children sg's
- * @direction: direction of transfer
- * @llis_bus: DMA memory address (physical) start for the LLIs
- * @llis_va: virtual memory address start for the LLIs
- * @cctl: control reg values for current txd
- * @ccfg: config reg values for current txd
- */
-struct pl08x_txd {
-	struct dma_async_tx_descriptor tx;
-	struct list_head node;
-	struct list_head dsg_list;
-	enum dma_transfer_direction direction;
-	dma_addr_t llis_bus;
-	struct pl08x_lli *llis_va;
-	/* Default cctl value for LLIs */
-	u32 cctl;
-	/*
-	 * Settings to be put into the physical channel when we
-	 * trigger this txd.  Other registers are in llis_va[0].
-	 */
-	u32 ccfg;
-};
-
-/**
- * struct pl08x_dma_chan_state - holds the PL08x specific virtual channel
- * states
- * @PL08X_CHAN_IDLE: the channel is idle
- * @PL08X_CHAN_RUNNING: the channel has allocated a physical transport
- * channel and is running a transfer on it
- * @PL08X_CHAN_PAUSED: the channel has allocated a physical transport
- * channel, but the transfer is currently paused
- * @PL08X_CHAN_WAITING: the channel is waiting for a physical transport
- * channel to become available (only pertains to memcpy channels)
- */
-enum pl08x_dma_chan_state {
-	PL08X_CHAN_IDLE,
-	PL08X_CHAN_RUNNING,
-	PL08X_CHAN_PAUSED,
-	PL08X_CHAN_WAITING,
-};
-
-/**
- * struct pl08x_dma_chan - this structure wraps a DMA ENGINE channel
- * @chan: wrappped abstract channel
- * @phychan: the physical channel utilized by this channel, if there is one
- * @phychan_hold: if non-zero, hold on to the physical channel even if we
- * have no pending entries
- * @tasklet: tasklet scheduled by the IRQ to handle actual work etc
- * @name: name of channel
- * @cd: channel platform data
- * @runtime_addr: address for RX/TX according to the runtime config
- * @runtime_direction: current direction of this channel according to
- * runtime config
- * @pend_list: queued transactions pending on this channel
- * @at: active transaction on this channel
- * @lock: a lock for this channel data
- * @host: a pointer to the host (internal use)
- * @state: whether the channel is idle, paused, running etc
- * @slave: whether this channel is a device (slave) or for memcpy
- * @device_fc: Flow Controller Settings for ccfg register. Only valid for slave
- * channels. Fill with 'true' if peripheral should be flow controller. Direction
- * will be selected at Runtime.
- * @waiting: a TX descriptor on this channel which is waiting for a physical
- * channel to become available
- */
-struct pl08x_dma_chan {
-	struct dma_chan chan;
-	struct pl08x_phy_chan *phychan;
-	int phychan_hold;
-	struct tasklet_struct tasklet;
-	char *name;
-	const struct pl08x_channel_data *cd;
-	dma_addr_t src_addr;
-	dma_addr_t dst_addr;
-	u32 src_cctl;
-	u32 dst_cctl;
-	enum dma_transfer_direction runtime_direction;
-	struct list_head pend_list;
-	struct pl08x_txd *at;
-	spinlock_t lock;
-	struct pl08x_driver_data *host;
-	enum pl08x_dma_chan_state state;
-	bool slave;
-	bool device_fc;
-	struct pl08x_txd *waiting;
-};
-
 /**
  * struct pl08x_platform_data - the platform configuration for the PL08x
  * PrimeCells.
-- 
cgit v1.2.3


From 550ec36f507177470a394c4dfffcaf986ca25818 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 28 May 2012 10:18:55 +0100
Subject: dmaengine: PL08x: constify channel names and bus_id strings

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/pl08x.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 48d02bf66ec9..158ce2634b01 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -58,7 +58,7 @@ enum {
  * these buses (use PL08X_AHB1 | PL08X_AHB2).
  */
 struct pl08x_channel_data {
-	char *bus_id;
+	const char *bus_id;
 	int min_signal;
 	int max_signal;
 	u32 muxval;
-- 
cgit v1.2.3


From dc8d5f8de12146c8732d926a30e5f064d76061e0 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 16 May 2012 12:20:55 +0100
Subject: dmaengine: PL08x: get rid of unnecessary checks in dma_slave_config

Get rid of the unnecessary checks in dma_slave_config utilizing
the DMA direction.  This allows us to move the computation of
cctl to the prepare function.

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/pl08x.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 158ce2634b01..2a5f64a11b77 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -47,7 +47,8 @@ enum {
  * devices with static assignments
  * @muxval: a number usually used to poke into some mux regiser to
  * mux in the signal to this channel
- * @cctl_opt: default options for the channel control register
+ * @cctl_memcpy: options for the channel control register for memcpy
+ *  *** not used for slave channels ***
  * @addr: source/target address in physical memory for this DMA channel,
  * can be the address of a FIFO register for burst requests for example.
  * This can be left undefined if the PrimeCell API is used for configuring
@@ -62,7 +63,7 @@ struct pl08x_channel_data {
 	int min_signal;
 	int max_signal;
 	u32 muxval;
-	u32 cctl;
+	u32 cctl_memcpy;
 	dma_addr_t addr;
 	bool single;
 	u8 periph_buses;
-- 
cgit v1.2.3


From 781d0f46d81e2c26c70649903b503bcfe817efc8 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 5 Jul 2012 12:19:19 +0100
Subject: irq_domain: Standardise legacy/linear domain selection

A large proportion of interrupt controllers that support legacy mappings
do so because non-DT systems need to use fixed IRQ numbers when registering
devices via buses but can otherwise use a linear mapping. The interrupt
controller itself typically is not affected by the mapping used and best
practice is to use a linear mapping where possible so drivers frequently
select at runtime depending on if a legacy range has been allocated to
them.

Standardise this behaviour by providing irq_domain_register_simple() which
will allocate a linear mapping unless a positive first_irq is provided in
which case it will fall back to a legacy mapping. This helps make best
practice for irq_domain adoption clearer.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/irqdomain.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 5abb533eb8eb..17b60be30fff 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -112,6 +112,11 @@ struct irq_domain {
 };
 
 #ifdef CONFIG_IRQ_DOMAIN
+struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
+					 unsigned int size,
+					 unsigned int first_irq,
+					 const struct irq_domain_ops *ops,
+					 void *host_data);
 struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
 					 unsigned int size,
 					 unsigned int first_irq,
-- 
cgit v1.2.3


From 98aa468e045a0091a7c34d9f5205a629634fabf4 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sun, 17 Jun 2012 16:17:04 -0600
Subject: irqdomain: Support for static IRQ mapping and association.

This adds a new strict mapping API for supporting creation of linux IRQs
at existing positions within the domain. The new routines are as follows:

For dynamic allocation and insertion to specified ranges:

	- irq_create_identity_mapping()
	- irq_create_strict_mappings()

These will allocate and associate a range of linux IRQs at the specified
location. This can be used by controllers that have their own static linux IRQ
definitions to map a hwirq range to, as well as for platforms that wish to
establish 1:1 identity mapping between linux and hwirq space.

For insertion to specified ranges by platforms that do their own irq_desc
management:

	- irq_domain_associate()
	- irq_domain_associate_many()

These in turn call back in to the domain's ->map() routine, for further
processing by the platform. Disassociation of IRQs get handled through
irq_dispose_mapping() as normal.

With these in place it should be possible to begin migration of legacy IRQ
domains to linear ones, without requiring special handling for static vs
dynamic IRQ definitions in DT vs non-DT paths. This also makes it possible
for domains with static mappings to adopt whichever tree model best fits
their needs, rather than simply restricting them to linear revmaps.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
[grant.likely: Reorganized irq_domain_associate{,_many} to have all logic in one place]
[grant.likely: Add error checking for unallocated irq_descs at associate time]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Rob Herring <rob.herring@calxeda.com>
---
 include/linux/irqdomain.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 17b60be30fff..eab8a0e60b8e 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -149,12 +149,31 @@ static inline struct irq_domain *irq_domain_add_legacy_isa(
 
 extern void irq_domain_remove(struct irq_domain *host);
 
+extern int irq_domain_associate_many(struct irq_domain *domain,
+				     unsigned int irq_base,
+				     irq_hw_number_t hwirq_base, int count);
+static inline int irq_domain_associate(struct irq_domain *domain, unsigned int irq,
+					irq_hw_number_t hwirq)
+{
+	return irq_domain_associate_many(domain, irq, hwirq, 1);
+}
+
 extern unsigned int irq_create_mapping(struct irq_domain *host,
 				       irq_hw_number_t hwirq);
 extern void irq_dispose_mapping(unsigned int virq);
 extern unsigned int irq_find_mapping(struct irq_domain *host,
 				     irq_hw_number_t hwirq);
 extern unsigned int irq_create_direct_mapping(struct irq_domain *host);
+extern int irq_create_strict_mappings(struct irq_domain *domain,
+				      unsigned int irq_base,
+				      irq_hw_number_t hwirq_base, int count);
+
+static inline int irq_create_identity_mapping(struct irq_domain *host,
+					      irq_hw_number_t hwirq)
+{
+	return irq_create_strict_mappings(host, hwirq, hwirq, 1);
+}
+
 extern void irq_radix_revmap_insert(struct irq_domain *host, unsigned int virq,
 				    irq_hw_number_t hwirq);
 extern unsigned int irq_radix_revmap_lookup(struct irq_domain *host,
-- 
cgit v1.2.3


From d6b0d1f7058f7cf818138cd7fd116dca3f3576d9 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sun, 3 Jun 2012 22:04:37 -0700
Subject: irqdomain: Eliminate dedicated radix lookup functions

In preparation to remove the slow revmap path, eliminate the public
radix revmap lookup functions.  This simplifies the code and makes the
slowpath removal patch a lot simpler.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Rob Herring <rob.herring@calxeda.com>
---
 include/linux/irqdomain.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index eab8a0e60b8e..0d5b17bf5e51 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -174,10 +174,6 @@ static inline int irq_create_identity_mapping(struct irq_domain *host,
 	return irq_create_strict_mappings(host, hwirq, hwirq, 1);
 }
 
-extern void irq_radix_revmap_insert(struct irq_domain *host, unsigned int virq,
-				    irq_hw_number_t hwirq);
-extern unsigned int irq_radix_revmap_lookup(struct irq_domain *host,
-					    irq_hw_number_t hwirq);
 extern unsigned int irq_linear_revmap(struct irq_domain *host,
 				      irq_hw_number_t hwirq);
 
-- 
cgit v1.2.3


From 775f4b297b780601e61787b766f306ed3e1d23eb Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 2 Jul 2012 07:52:16 -0400
Subject: random: make 'add_interrupt_randomness()' do something sane

We've been moving away from add_interrupt_randomness() for various
reasons: it's too expensive to do on every interrupt, and flooding the
CPU with interrupts could theoretically cause bogus floods of entropy
from a somewhat externally controllable source.

This solves both problems by limiting the actual randomness addition
to just once a second or after 64 interrupts, whicever comes first.
During that time, the interrupt cycle data is buffered up in a per-cpu
pool.  Also, we make sure the the nonblocking pool used by urandom is
initialized before we start feeding the normal input pool.  This
assures that /dev/urandom is returning unpredictable data as soon as
possible.

(Based on an original patch by Linus, but significantly modified by
tytso.)

Tested-by: Eric Wustrow <ewust@umich.edu>
Reported-by: Eric Wustrow <ewust@umich.edu>
Reported-by: Nadia Heninger <nadiah@cs.ucsd.edu>
Reported-by: Zakir Durumeric <zakir@umich.edu>
Reported-by: J. Alex Halderman <jhalderm@umich.edu>.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 include/linux/random.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index 8f74538c96db..6ef39d7f2db1 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -52,7 +52,7 @@ extern void rand_initialize_irq(int irq);
 
 extern void add_input_randomness(unsigned int type, unsigned int code,
 				 unsigned int value);
-extern void add_interrupt_randomness(int irq);
+extern void add_interrupt_randomness(int irq, int irq_flags);
 
 extern void get_random_bytes(void *buf, int nbytes);
 void generate_random_uuid(unsigned char uuid_out[16]);
-- 
cgit v1.2.3


From a2080a67abe9e314f9e9c2cc3a4a176e8a8f8793 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 4 Jul 2012 11:16:01 -0400
Subject: random: create add_device_randomness() interface

Add a new interface, add_device_randomness() for adding data to the
random pool that is likely to differ between two devices (or possibly
even per boot).  This would be things like MAC addresses or serial
numbers, or the read-out of the RTC. This does *not* add any actual
entropy to the pool, but it initializes the pool to different values
for devices that might otherwise be identical and have very little
entropy available to them (particularly common in the embedded world).

[ Modified by tytso to mix in a timestamp, since there may be some
  variability caused by the time needed to detect/configure the hardware
  in question. ]

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 include/linux/random.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index 6ef39d7f2db1..e14b4387354a 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -50,6 +50,7 @@ struct rnd_state {
 
 extern void rand_initialize_irq(int irq);
 
+extern void add_device_randomness(const void *, unsigned int);
 extern void add_input_randomness(unsigned int type, unsigned int code,
 				 unsigned int value);
 extern void add_interrupt_randomness(int irq, int irq_flags);
-- 
cgit v1.2.3


From c2557a303ab6712bb6e09447df828c557c710ac9 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 5 Jul 2012 10:35:23 -0400
Subject: random: add new get_random_bytes_arch() function

Create a new function, get_random_bytes_arch() which will use the
architecture-specific hardware random number generator if it is
present.  Change get_random_bytes() to not use the HW RNG, even if it
is avaiable.

The reason for this is that the hw random number generator is fast (if
it is present), but it requires that we trust the hardware
manufacturer to have not put in a back door.  (For example, an
increasing counter encrypted by an AES key known to the NSA.)

It's unlikely that Intel (for example) was paid off by the US
Government to do this, but it's impossible for them to prove otherwise
--- especially since Bull Mountain is documented to use AES as a
whitener.  Hence, the output of an evil, trojan-horse version of
RDRAND is statistically indistinguishable from an RDRAND implemented
to the specifications claimed by Intel.  Short of using a tunnelling
electronic microscope to reverse engineer an Ivy Bridge chip and
disassembling and analyzing the CPU microcode, there's no way for us
to tell for sure.

Since users of get_random_bytes() in the Linux kernel need to be able
to support hardware systems where the HW RNG is not present, most
time-sensitive users of this interface have already created their own
cryptographic RNG interface which uses get_random_bytes() as a seed.
So it's much better to use the HW RNG to improve the existing random
number generator, by mixing in any entropy returned by the HW RNG into
/dev/random's entropy pool, but to always _use_ /dev/random's entropy
pool.

This way we get almost of the benefits of the HW RNG without any
potential liabilities.  The only benefits we forgo is the
speed/performance enhancements --- and generic kernel code can't
depend on depend on get_random_bytes() having the speed of a HW RNG
anyway.

For those places that really want access to the arch-specific HW RNG,
if it is available, we provide get_random_bytes_arch().

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 include/linux/random.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index e14b4387354a..29e217a7e6d0 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -56,6 +56,7 @@ extern void add_input_randomness(unsigned int type, unsigned int code,
 extern void add_interrupt_randomness(int irq, int irq_flags);
 
 extern void get_random_bytes(void *buf, int nbytes);
+extern void get_random_bytes_arch(void *buf, int nbytes);
 void generate_random_uuid(unsigned char uuid_out[16]);
 
 #ifndef MODULE
-- 
cgit v1.2.3


From 72d0d248ca8232dbd30d35b42d0d86e39b3e322b Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Mon, 16 Jul 2012 15:23:48 -0400
Subject: fuse: add FUSE_AUTO_INVAL_DATA init flag

FUSE_AUTO_INVAL_DATA is provided to enable updated/auto cache
invalidation logic.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 include/linux/fuse.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 9303348965fb..e4a9d2af9aaa 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -57,6 +57,9 @@
  *
  * 7.19
  *  - add FUSE_FALLOCATE
+ *
+ * 7.20
+ *  - add FUSE_AUTO_INVAL_DATA
  */
 
 #ifndef _LINUX_FUSE_H
@@ -88,7 +91,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 19
+#define FUSE_KERNEL_MINOR_VERSION 20
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -167,6 +170,7 @@ struct fuse_file_lock {
  * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
  * FUSE_DONT_MASK: don't apply umask to file mode on create operations
  * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
+ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -176,6 +180,7 @@ struct fuse_file_lock {
 #define FUSE_BIG_WRITES		(1 << 5)
 #define FUSE_DONT_MASK		(1 << 6)
 #define FUSE_FLOCK_LOCKS	(1 << 10)
+#define FUSE_AUTO_INVAL_DATA	(1 << 12)
 
 /**
  * CUSE INIT request/reply flags
-- 
cgit v1.2.3


From 69fe05c90ed58aac956dccb9e6d3a325fb3b8767 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 18 Jul 2012 16:09:40 +0200
Subject: fuse: add missing INIT flags

Add missing flags that userspace derived from the protocol version number.  This
makes the protocol more flexible.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 include/linux/fuse.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index e4a9d2af9aaa..6455c5b64c2e 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -169,7 +169,11 @@ struct fuse_file_lock {
  * FUSE_POSIX_LOCKS: remote locking for POSIX file locks
  * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
  * FUSE_DONT_MASK: don't apply umask to file mode on create operations
+ * FUSE_SPLICE_WRITE: kernel supports splice write on the device
+ * FUSE_SPLICE_MOVE: kernel supports splice move on the device
+ * FUSE_SPLICE_READ: kernel supports splice read on the device
  * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
+ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories
  * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages
  */
 #define FUSE_ASYNC_READ		(1 << 0)
@@ -179,7 +183,11 @@ struct fuse_file_lock {
 #define FUSE_EXPORT_SUPPORT	(1 << 4)
 #define FUSE_BIG_WRITES		(1 << 5)
 #define FUSE_DONT_MASK		(1 << 6)
+#define FUSE_SPLICE_WRITE	(1 << 7)
+#define FUSE_SPLICE_MOVE	(1 << 8)
+#define FUSE_SPLICE_READ	(1 << 9)
 #define FUSE_FLOCK_LOCKS	(1 << 10)
+#define FUSE_HAS_IOCTL_DIR	(1 << 11)
 #define FUSE_AUTO_INVAL_DATA	(1 << 12)
 
 /**
-- 
cgit v1.2.3


From f3840dc0fb57aef120c5ee8241cdc9aaf3cec8d4 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 18 Jul 2012 16:09:40 +0200
Subject: fuse: add missing INIT flag descriptions

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 include/linux/fuse.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 6455c5b64c2e..d8c713e148e3 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -166,8 +166,12 @@ struct fuse_file_lock {
 /**
  * INIT request/reply flags
  *
+ * FUSE_ASYNC_READ: asynchronous read requests
  * FUSE_POSIX_LOCKS: remote locking for POSIX file locks
+ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported)
+ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem
  * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
+ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB
  * FUSE_DONT_MASK: don't apply umask to file mode on create operations
  * FUSE_SPLICE_WRITE: kernel supports splice write on the device
  * FUSE_SPLICE_MOVE: kernel supports splice move on the device
-- 
cgit v1.2.3


From c5857ccf293968348e5eb4ebedc68074de3dcda6 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 14 Jul 2012 20:27:52 -0400
Subject: random: remove rand_initialize_irq()

With the new interrupt sampling system, we are no longer using the
timer_rand_state structure in the irq descriptor, so we can stop
initializing it now.

[ Merged in fixes from Sedat to find some last missing references to
  rand_initialize_irq() ]

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Sedat Dilek <sedat.dilek@gmail.com>
---
 include/linux/irqdesc.h | 1 -
 include/linux/random.h  | 2 --
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index f1e2527006bd..9a323d12de1c 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -39,7 +39,6 @@ struct module;
  */
 struct irq_desc {
 	struct irq_data		irq_data;
-	struct timer_rand_state *timer_rand_state;
 	unsigned int __percpu	*kstat_irqs;
 	irq_flow_handler_t	handle_irq;
 #ifdef CONFIG_IRQ_PREFLOW_FASTEOI
diff --git a/include/linux/random.h b/include/linux/random.h
index 29e217a7e6d0..ac621ce886ca 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -48,8 +48,6 @@ struct rnd_state {
 
 #ifdef __KERNEL__
 
-extern void rand_initialize_irq(int irq);
-
 extern void add_device_randomness(const void *, unsigned int);
 extern void add_input_randomness(unsigned int type, unsigned int code,
 				 unsigned int value);
-- 
cgit v1.2.3


From b4237003cff66084ebeb502412d9cee392e6f52f Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 17 Jul 2012 14:20:51 -0400
Subject: random: final removal of IRQF_SAMPLE_RANDOM

The IRQF_SAMPLE_RANDOM flag is finally gone from the kernel tree, only
three years late.  :-)

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/interrupt.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index e68a8e53bb59..c5f856a040b9 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -42,7 +42,6 @@
  *
  * IRQF_DISABLED - keep irqs disabled when calling the action handler.
  *                 DEPRECATED. This flag is a NOOP and scheduled to be removed
- * IRQF_SAMPLE_RANDOM - irq is used to feed the random generator
  * IRQF_SHARED - allow sharing the irq among several devices
  * IRQF_PROBE_SHARED - set by callers when they expect sharing mismatches to occur
  * IRQF_TIMER - Flag to mark this interrupt as timer interrupt
@@ -61,7 +60,6 @@
  *                resume time.
  */
 #define IRQF_DISABLED		0x00000020
-#define IRQF_SAMPLE_RANDOM	0x00000040
 #define IRQF_SHARED		0x00000080
 #define IRQF_PROBE_SHARED	0x00000100
 #define __IRQF_TIMER		0x00000200
-- 
cgit v1.2.3


From dc9b229a58dc0dfed34272ff26c6d5fd17c674e0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2012 19:29:45 +0200
Subject: genirq: Allow irq chips to mark themself oneshot safe

Some interrupt chips like MSI are oneshot safe by implementation. For
those interrupts we can avoid the mask/unmask sequence for threaded
interrupt handlers.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1207132056540.32033@ionos
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Jan Kiszka <jan.kiszka@web.de>
---
 include/linux/irq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 553fb66da130..216b0ba109d7 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -349,6 +349,7 @@ enum {
 	IRQCHIP_MASK_ON_SUSPEND		= (1 <<  2),
 	IRQCHIP_ONOFFLINE_ENABLED	= (1 <<  3),
 	IRQCHIP_SKIP_SET_WAKE		= (1 <<  4),
+	IRQCHIP_ONESHOT_SAFE		= (1 <<  5),
 };
 
 /* This include will go away once we isolated irq_desc usage to core code */
-- 
cgit v1.2.3


From 6956dc568f34107f1d02b24f87efe7250803fc87 Mon Sep 17 00:00:00 2001
From: Alex Shi <alex.shi@intel.com>
Date: Fri, 20 Jul 2012 14:19:50 +0800
Subject: sched/numa: Add SD_PERFER_SIBLING to CPU domain

Commit 8e7fbcbc22c ("sched: Remove stale power aware scheduling remnants
and dysfunctional knobs") removed SD_PERFER_SIBLING from the CPU domain.

On NUMA machines this causes that load_balance() doesn't perfer LCPU in
 same physical CPU package.

It causes some actual performance regressions on our NUMA machines from
Core2 to NHM and SNB.

Adding this domain flag again recovers the performance drop.

This change doesn't have any bad impact on any of my benchmarks:
 specjbb, kbuild, fio, hackbench .. etc, on all my machines.

Signed-off-by: Alex Shi <alex.shi@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1342765190-21540-1-git-send-email-alex.shi@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/topology.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index e91cd43394df..fec12d667211 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -164,6 +164,7 @@ int arch_update_cpu_topology(void);
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
+				| 1*SD_PREFER_SIBLING			\
 				,					\
 	.last_balance		= jiffies,				\
 	.balance_interval	= 1,					\
-- 
cgit v1.2.3


From a7e4786b937a3ae918a7520cfdba557a80915fa7 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Sat, 21 Jul 2012 00:54:59 +0530
Subject: sched: Fix comment about PREEMPT_ACTIVE bit location

PREEMPT_ACTIVE flag is bit 27, not 28. Fix the comment.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: paulmck@linux.vnet.ibm.com
Cc: josh@joshtriplett.org
Cc: rostedt@goodmis.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120720192459.6149.14821.stgit@srivatsabhat.in.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/hardirq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index bb7f30971858..305f23cd7cff 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -22,7 +22,7 @@
  *
  * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
  * - bit 26 is the NMI_MASK
- * - bit 28 is the PREEMPT_ACTIVE flag
+ * - bit 27 is the PREEMPT_ACTIVE flag
  *
  * PREEMPT_MASK: 0x000000ff
  * SOFTIRQ_MASK: 0x0000ff00
-- 
cgit v1.2.3


From 8fc23e032debd682f5ba9fc524a5846c10d2c522 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 26 Jul 2012 11:29:57 -0600
Subject: NVMe: Set block queue max sectors

Set the max hw sectors in a namespace's request queue if the nvme device
has a max data transfer size.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 9490a00529f4..8c71d2004c6d 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -37,6 +37,7 @@ struct nvme_bar {
 
 #define NVME_CAP_TIMEOUT(cap)	(((cap) >> 24) & 0xff)
 #define NVME_CAP_STRIDE(cap)	(((cap) >> 32) & 0xf)
+#define NVME_CAP_MPSMIN(cap)	(((cap) >> 48) & 0xf)
 
 enum {
 	NVME_CC_ENABLE		= 1 << 0,
-- 
cgit v1.2.3


From a0cadb85b8b758608ae0759151e29de7581c6731 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 27 Jul 2012 13:57:23 -0400
Subject: NVMe: Do not set IO queue depth beyond device max

Set the depth for IO queues to the device's maximum supported queue
entries if the requested depth exceeds the device's capabilities.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 8c71d2004c6d..c25cccaa555a 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -35,6 +35,7 @@ struct nvme_bar {
 	__u64			acq;	/* Admin CQ Base Address */
 };
 
+#define NVME_CAP_MQES(cap)	((cap) & 0xffff)
 #define NVME_CAP_TIMEOUT(cap)	(((cap) >> 24) & 0xff)
 #define NVME_CAP_STRIDE(cap)	(((cap) >> 32) & 0xf)
 #define NVME_CAP_MPSMIN(cap)	(((cap) >> 48) & 0xf)
-- 
cgit v1.2.3


From 921a1650de9eed40dd64d681aba4a4d98856f289 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 20 Jul 2012 01:15:31 +0400
Subject: new helper: done_path_create()

releases what needs to be released after {kern,user}_path_create()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/namei.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index d2ef8b34b967..4bf19d8174ed 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -67,6 +67,7 @@ extern int kern_path(const char *, unsigned, struct path *);
 
 extern struct dentry *kern_path_create(int, const char *, struct path *, int);
 extern struct dentry *user_path_create(int, const char __user *, struct path *, int);
+extern void done_path_create(struct path *, struct dentry *);
 extern struct dentry *kern_path_locked(const char *, struct path *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct path *);
-- 
cgit v1.2.3


From e4fad8e5d220e3dfb1050eee752ee5058f29a232 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 21 Jul 2012 15:33:25 +0400
Subject: consolidate pipe file creation

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h        | 3 ---
 include/linux/pipe_fs_i.h | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8fabb037a48d..478237844648 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2326,9 +2326,6 @@ static inline void i_readcount_inc(struct inode *inode)
 }
 #endif
 extern int do_pipe_flags(int *, int);
-extern struct file *create_read_pipe(struct file *f, int flags);
-extern struct file *create_write_pipe(int flags);
-extern void free_write_pipe(struct file *);
 
 extern int kernel_read(struct file *, loff_t, char *, unsigned long);
 extern struct file * open_exec(const char *);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index e1ac1ce16fb0..e16dcb31f0c7 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -162,4 +162,6 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
 long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
 struct pipe_inode_info *get_pipe_info(struct file *file);
 
+int create_pipe_files(struct file **, int);
+
 #endif
-- 
cgit v1.2.3


From 800179c9b8a1e796e441674776d11cd4c05d61d7 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Jul 2012 17:29:07 -0700
Subject: fs: add link restrictions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds symlink and hardlink restrictions to the Linux VFS.

Symlinks:

A long-standing class of security issues is the symlink-based
time-of-check-time-of-use race, most commonly seen in world-writable
directories like /tmp. The common method of exploitation of this flaw
is to cross privilege boundaries when following a given symlink (i.e. a
root process follows a symlink belonging to another user). For a likely
incomplete list of hundreds of examples across the years, please see:
http://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=/tmp

The solution is to permit symlinks to only be followed when outside
a sticky world-writable directory, or when the uid of the symlink and
follower match, or when the directory owner matches the symlink's owner.

Some pointers to the history of earlier discussion that I could find:

 1996 Aug, Zygo Blaxell
  http://marc.info/?l=bugtraq&m=87602167419830&w=2
 1996 Oct, Andrew Tridgell
  http://lkml.indiana.edu/hypermail/linux/kernel/9610.2/0086.html
 1997 Dec, Albert D Cahalan
  http://lkml.org/lkml/1997/12/16/4
 2005 Feb, Lorenzo Hernández García-Hierro
  http://lkml.indiana.edu/hypermail/linux/kernel/0502.0/1896.html
 2010 May, Kees Cook
  https://lkml.org/lkml/2010/5/30/144

Past objections and rebuttals could be summarized as:

 - Violates POSIX.
   - POSIX didn't consider this situation and it's not useful to follow
     a broken specification at the cost of security.
 - Might break unknown applications that use this feature.
   - Applications that break because of the change are easy to spot and
     fix. Applications that are vulnerable to symlink ToCToU by not having
     the change aren't. Additionally, no applications have yet been found
     that rely on this behavior.
 - Applications should just use mkstemp() or O_CREATE|O_EXCL.
   - True, but applications are not perfect, and new software is written
     all the time that makes these mistakes; blocking this flaw at the
     kernel is a single solution to the entire class of vulnerability.
 - This should live in the core VFS.
   - This should live in an LSM. (https://lkml.org/lkml/2010/5/31/135)
 - This should live in an LSM.
   - This should live in the core VFS. (https://lkml.org/lkml/2010/8/2/188)

Hardlinks:

On systems that have user-writable directories on the same partition
as system files, a long-standing class of security issues is the
hardlink-based time-of-check-time-of-use race, most commonly seen in
world-writable directories like /tmp. The common method of exploitation
of this flaw is to cross privilege boundaries when following a given
hardlink (i.e. a root process follows a hardlink created by another
user). Additionally, an issue exists where users can "pin" a potentially
vulnerable setuid/setgid file so that an administrator will not actually
upgrade a system fully.

The solution is to permit hardlinks to only be created when the user is
already the existing file's owner, or if they already have read/write
access to the existing file.

Many Linux users are surprised when they learn they can link to files
they have no access to, so this change appears to follow the doctrine
of "least surprise". Additionally, this change does not violate POSIX,
which states "the implementation may require that the calling process
has permission to access the existing file"[1].

This change is known to break some implementations of the "at" daemon,
though the version used by Fedora and Ubuntu has been fixed[2] for
a while. Otherwise, the change has been undisruptive while in use in
Ubuntu for the last 1.5 years.

[1] http://pubs.opengroup.org/onlinepubs/9699919799/functions/linkat.html
[2] http://anonscm.debian.org/gitweb/?p=collab-maint/at.git;a=commitdiff;h=f4114656c3a6c6f6070e315ffdf940a49eda3279

This patch is based on the patches in Openwall and grsecurity, along with
suggestions from Al Viro. I have added a sysctl to enable the protected
behavior, and documentation.

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 478237844648..80c819cbe272 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -437,6 +437,8 @@ extern unsigned long get_max_files(void);
 extern int sysctl_nr_open;
 extern struct inodes_stat_t inodes_stat;
 extern int leases_enable, lease_break_time;
+extern int sysctl_protected_symlinks;
+extern int sysctl_protected_hardlinks;
 
 struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
-- 
cgit v1.2.3


From a51d9eaa41866ab6b4b6ecad7b621f8b66ece0dc Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Jul 2012 17:29:08 -0700
Subject: fs: add link restriction audit reporting

Adds audit messages for unexpected link restriction violations so that
system owners will have some sort of potentially actionable information
about misbehaving processes.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 22f292a917a3..36abf2aa7e68 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -130,6 +130,7 @@
 #define AUDIT_LAST_KERN_ANOM_MSG    1799
 #define AUDIT_ANOM_PROMISCUOUS      1700 /* Device changed promiscuous mode */
 #define AUDIT_ANOM_ABEND            1701 /* Process ended abnormally */
+#define AUDIT_ANOM_LINK		    1702 /* Suspicious use of file links */
 #define AUDIT_INTEGRITY_DATA	    1800 /* Data integrity verification */
 #define AUDIT_INTEGRITY_METADATA    1801 /* Metadata integrity verification */
 #define AUDIT_INTEGRITY_STATUS	    1802 /* Integrity enable status */
@@ -687,6 +688,8 @@ extern void		    audit_log_d_path(struct audit_buffer *ab,
 					     const struct path *path);
 extern void		    audit_log_key(struct audit_buffer *ab,
 					  char *key);
+extern void		    audit_log_link_denied(const char *operation,
+						  struct path *link);
 extern void		    audit_log_lost(const char *message);
 #ifdef CONFIG_SECURITY
 extern void 		    audit_log_secctx(struct audit_buffer *ab, u32 secid);
@@ -716,6 +719,7 @@ extern int audit_enabled;
 #define audit_log_untrustedstring(a,s) do { ; } while (0)
 #define audit_log_d_path(b, p, d) do { ; } while (0)
 #define audit_log_key(b, k) do { ; } while (0)
+#define audit_log_link_denied(o, l) do { ; } while (0)
 #define audit_log_secctx(b,s) do { ; } while (0)
 #define audit_enabled 0
 #endif
-- 
cgit v1.2.3


From 4fcf1c6205fcfc7a226a144ae4d83b7f5415cab8 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 12 Jun 2012 16:20:29 +0200
Subject: mm: Make default vm_ops provide ->page_mkwrite handler

Make default vm_ops provide ->page_mkwrite handler. Currently it only updates
file's modification times and gets locked page but later it will also handle
filesystem freezing.

BugLink: https://bugs.launchpad.net/bugs/897421
Tested-by: Kamal Mostafa <kamal@canonical.com>
Tested-by: Peter M. Petrakis <peter.petrakis@canonical.com>
Tested-by: Dann Frazier <dann.frazier@canonical.com>
Tested-by: Massimo Morana <massimo.morana@canonical.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b36d08ce5c57..15987d8e1d59 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1411,6 +1411,7 @@ extern void truncate_inode_pages_range(struct address_space *,
 
 /* generic vm_area_ops exported for stackable file systems */
 extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
+extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 
 /* mm/page-writeback.c */
 int write_one_page(struct page *page, int wait);
-- 
cgit v1.2.3


From 4a55c1017b8dcfd0554734ce3f19374d5b522d59 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 12 Jun 2012 16:20:33 +0200
Subject: nfsd: Push mnt_want_write() outside of i_mutex

When mnt_want_write() starts to handle freezing it will get a full lock
semantics requiring proper lock ordering. So push mnt_want_write() call
consistently outside of i_mutex.

CC: linux-nfs@vger.kernel.org
CC: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/nfsd/nfsfh.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index ce4743a26015..fa63048fecff 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -143,6 +143,7 @@ typedef struct svc_fh {
 	int			fh_maxsize;	/* max size for fh_handle */
 
 	unsigned char		fh_locked;	/* inode locked by us */
+	unsigned char		fh_want_write;	/* remount protection taken */
 
 #ifdef CONFIG_NFSD_V3
 	unsigned char		fh_post_saved;	/* post-op attrs saved */
-- 
cgit v1.2.3


From bc7892c2142ce1eb1b0792337391414728cddf9e Mon Sep 17 00:00:00 2001
From: Michael Jones <michael.jones@matrix-vision.de>
Date: Thu, 26 Jul 2012 11:31:51 -0300
Subject: [media] omap3isp: #include videodev2.h in omap3isp.h

include/linux/omap3isp.h uses BASE_VIDIOC_PRIVATE from include/linux/videodev2.h
but didn't include this file.

Signed-off-by: Michael Jones <michael.jones@matrix-vision.de>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/omap3isp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/omap3isp.h b/include/linux/omap3isp.h
index c73a34c3434d..e7a79db3c1f7 100644
--- a/include/linux/omap3isp.h
+++ b/include/linux/omap3isp.h
@@ -28,6 +28,7 @@
 #define OMAP3_ISP_USER_H
 
 #include <linux/types.h>
+#include <linux/videodev2.h>
 
 /*
  * Private IOCTLs
-- 
cgit v1.2.3


From 660e22c7b008f1b21283cc2d786b2dd7c7790440 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@iki.fi>
Date: Mon, 30 Jul 2012 05:08:47 -0300
Subject: [media] v4l: Add missing compatibility definitions for bounds
 rectangles

Compatibility defines for ACTUAL subdev selection rectangles were added and
also the name of the BOUNDS rectangles was changed in the process, which,
alas, went unnoticed until now. Add compatibility definitions for these
rectangles.

Signed-off-by: Sakari Ailus <sakari.ailus@iki.fi>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/v4l2-common.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/v4l2-common.h b/include/linux/v4l2-common.h
index 0fa8b64c3cdb..4f0667e010dd 100644
--- a/include/linux/v4l2-common.h
+++ b/include/linux/v4l2-common.h
@@ -53,10 +53,10 @@
 /* Backward compatibility target definitions --- to be removed. */
 #define V4L2_SEL_TGT_CROP_ACTIVE	V4L2_SEL_TGT_CROP
 #define V4L2_SEL_TGT_COMPOSE_ACTIVE	V4L2_SEL_TGT_COMPOSE
-#define V4L2_SUBDEV_SEL_TGT_CROP_ACTUAL \
-	V4L2_SEL_TGT_CROP
-#define V4L2_SUBDEV_SEL_TGT_COMPOSE_ACTUAL \
-	V4L2_SEL_TGT_COMPOSE
+#define V4L2_SUBDEV_SEL_TGT_CROP_ACTUAL	V4L2_SEL_TGT_CROP
+#define V4L2_SUBDEV_SEL_TGT_COMPOSE_ACTUAL V4L2_SEL_TGT_COMPOSE
+#define V4L2_SUBDEV_SEL_TGT_CROP_BOUNDS	V4L2_SEL_TGT_CROP_BOUNDS
+#define V4L2_SUBDEV_SEL_TGT_COMPOSE_BOUNDS V4L2_SEL_TGT_COMPOSE_BOUNDS
 
 /* Selection flags */
 #define V4L2_SEL_FLAG_GE		(1 << 0)
-- 
cgit v1.2.3


From 5accdf82ba25cacefd6c1867f1704beb4d244cdd Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 12 Jun 2012 16:20:34 +0200
Subject: fs: Improve filesystem freezing handling

vfs_check_frozen() tests are racy since the filesystem can be frozen just after
the test is performed. Thus in write paths we can end up marking some pages or
inodes dirty even though the file system is already frozen. This creates
problems with flusher thread hanging on frozen filesystem.

Another problem is that exclusion between ->page_mkwrite() and filesystem
freezing has been handled by setting page dirty and then verifying s_frozen.
This guaranteed that either the freezing code sees the faulted page, writes it,
and writeprotects it again or we see s_frozen set and bail out of page fault.
This works to protect from page being marked writeable while filesystem
freezing is running but has an unpleasant artefact of leaving dirty (although
unmodified and writeprotected) pages on frozen filesystem resulting in similar
problems with flusher thread as the first problem.

This patch aims at providing exclusion between write paths and filesystem
freezing. We implement a writer-freeze read-write semaphore in the superblock.
Actually, there are three such semaphores because of lock ranking reasons - one
for page fault handlers (->page_mkwrite), one for all other writers, and one of
internal filesystem purposes (used e.g. to track running transactions).  Write
paths which should block freezing (e.g. directory operations, ->aio_write(),
->page_mkwrite) hold reader side of the semaphore. Code freezing the filesystem
takes the writer side.

Only that we don't really want to bounce cachelines of the semaphores between
CPUs for each write happening. So we implement the reader side of the semaphore
as a per-cpu counter and the writer side is implemented using s_writers.frozen
superblock field.

[AV: microoptimize sb_start_write(); we want it fast in normal case]

BugLink: https://bugs.launchpad.net/bugs/897421
Tested-by: Kamal Mostafa <kamal@canonical.com>
Tested-by: Peter M. Petrakis <peter.petrakis@canonical.com>
Tested-by: Dann Frazier <dann.frazier@canonical.com>
Tested-by: Massimo Morana <massimo.morana@canonical.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 143 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 80c819cbe272..aefed9426b03 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -412,6 +412,7 @@ struct inodes_stat_t {
 #include <linux/shrinker.h>
 #include <linux/migrate_mode.h>
 #include <linux/uidgid.h>
+#include <linux/lockdep.h>
 
 #include <asm/byteorder.h>
 
@@ -1439,6 +1440,8 @@ extern void f_delown(struct file *filp);
 extern pid_t f_getown(struct file *filp);
 extern int send_sigurg(struct fown_struct *fown);
 
+struct mm_struct;
+
 /*
  *	Umount options
  */
@@ -1452,6 +1455,32 @@ extern int send_sigurg(struct fown_struct *fown);
 extern struct list_head super_blocks;
 extern spinlock_t sb_lock;
 
+/* Possible states of 'frozen' field */
+enum {
+	SB_UNFROZEN = 0,		/* FS is unfrozen */
+	SB_FREEZE_WRITE	= 1,		/* Writes, dir ops, ioctls frozen */
+	SB_FREEZE_TRANS = 2,
+	SB_FREEZE_PAGEFAULT = 2,	/* Page faults stopped as well */
+	SB_FREEZE_FS = 3,		/* For internal FS use (e.g. to stop
+					 * internal threads if needed) */
+	SB_FREEZE_COMPLETE = 4,		/* ->freeze_fs finished successfully */
+};
+
+#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
+
+struct sb_writers {
+	/* Counters for counting writers at each level */
+	struct percpu_counter	counter[SB_FREEZE_LEVELS];
+	wait_queue_head_t	wait;		/* queue for waiting for
+						   writers / faults to finish */
+	int			frozen;		/* Is sb frozen? */
+	wait_queue_head_t	wait_unfrozen;	/* queue for waiting for
+						   sb to be thawed */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	lock_map[SB_FREEZE_LEVELS];
+#endif
+};
+
 struct super_block {
 	struct list_head	s_list;		/* Keep this first */
 	dev_t			s_dev;		/* search index; _not_ kdev_t */
@@ -1501,6 +1530,7 @@ struct super_block {
 
 	int			s_frozen;
 	wait_queue_head_t	s_wait_unfrozen;
+	struct sb_writers	s_writers;
 
 	char s_id[32];				/* Informational name */
 	u8 s_uuid[16];				/* UUID */
@@ -1555,14 +1585,119 @@ extern struct timespec current_fs_time(struct super_block *sb);
 /*
  * Snapshotting support.
  */
-enum {
-	SB_UNFROZEN = 0,
-	SB_FREEZE_WRITE	= 1,
-	SB_FREEZE_TRANS = 2,
-};
+/* Will go away when all users are converted */
+#define vfs_check_frozen(sb, level) do { } while (0)
+
+void __sb_end_write(struct super_block *sb, int level);
+int __sb_start_write(struct super_block *sb, int level, bool wait);
+
+/**
+ * sb_end_write - drop write access to a superblock
+ * @sb: the super we wrote to
+ *
+ * Decrement number of writers to the filesystem. Wake up possible waiters
+ * wanting to freeze the filesystem.
+ */
+static inline void sb_end_write(struct super_block *sb)
+{
+	__sb_end_write(sb, SB_FREEZE_WRITE);
+}
+
+/**
+ * sb_end_pagefault - drop write access to a superblock from a page fault
+ * @sb: the super we wrote to
+ *
+ * Decrement number of processes handling write page fault to the filesystem.
+ * Wake up possible waiters wanting to freeze the filesystem.
+ */
+static inline void sb_end_pagefault(struct super_block *sb)
+{
+	__sb_end_write(sb, SB_FREEZE_PAGEFAULT);
+}
+
+/**
+ * sb_end_intwrite - drop write access to a superblock for internal fs purposes
+ * @sb: the super we wrote to
+ *
+ * Decrement fs-internal number of writers to the filesystem.  Wake up possible
+ * waiters wanting to freeze the filesystem.
+ */
+static inline void sb_end_intwrite(struct super_block *sb)
+{
+	__sb_end_write(sb, SB_FREEZE_FS);
+}
+
+/**
+ * sb_start_write - get write access to a superblock
+ * @sb: the super we write to
+ *
+ * When a process wants to write data or metadata to a file system (i.e. dirty
+ * a page or an inode), it should embed the operation in a sb_start_write() -
+ * sb_end_write() pair to get exclusion against file system freezing. This
+ * function increments number of writers preventing freezing. If the file
+ * system is already frozen, the function waits until the file system is
+ * thawed.
+ *
+ * Since freeze protection behaves as a lock, users have to preserve
+ * ordering of freeze protection and other filesystem locks. Generally,
+ * freeze protection should be the outermost lock. In particular, we have:
+ *
+ * sb_start_write
+ *   -> i_mutex			(write path, truncate, directory ops, ...)
+ *   -> s_umount		(freeze_super, thaw_super)
+ */
+static inline void sb_start_write(struct super_block *sb)
+{
+	__sb_start_write(sb, SB_FREEZE_WRITE, true);
+}
+
+static inline int sb_start_write_trylock(struct super_block *sb)
+{
+	return __sb_start_write(sb, SB_FREEZE_WRITE, false);
+}
+
+/**
+ * sb_start_pagefault - get write access to a superblock from a page fault
+ * @sb: the super we write to
+ *
+ * When a process starts handling write page fault, it should embed the
+ * operation into sb_start_pagefault() - sb_end_pagefault() pair to get
+ * exclusion against file system freezing. This is needed since the page fault
+ * is going to dirty a page. This function increments number of running page
+ * faults preventing freezing. If the file system is already frozen, the
+ * function waits until the file system is thawed.
+ *
+ * Since page fault freeze protection behaves as a lock, users have to preserve
+ * ordering of freeze protection and other filesystem locks. It is advised to
+ * put sb_start_pagefault() close to mmap_sem in lock ordering. Page fault
+ * handling code implies lock dependency:
+ *
+ * mmap_sem
+ *   -> sb_start_pagefault
+ */
+static inline void sb_start_pagefault(struct super_block *sb)
+{
+	__sb_start_write(sb, SB_FREEZE_PAGEFAULT, true);
+}
+
+/*
+ * sb_start_intwrite - get write access to a superblock for internal fs purposes
+ * @sb: the super we write to
+ *
+ * This is the third level of protection against filesystem freezing. It is
+ * free for use by a filesystem. The only requirement is that it must rank
+ * below sb_start_pagefault.
+ *
+ * For example filesystem can call sb_start_intwrite() when starting a
+ * transaction which somewhat eases handling of freezing for internal sources
+ * of filesystem changes (internal fs threads, discarding preallocation on file
+ * close, etc.).
+ */
+static inline void sb_start_intwrite(struct super_block *sb)
+{
+	__sb_start_write(sb, SB_FREEZE_FS, true);
+}
 
-#define vfs_check_frozen(sb, level) \
-	wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
 
 extern bool inode_owner_or_capable(const struct inode *inode);
 
@@ -1886,6 +2021,7 @@ struct file_system_type {
 	struct lock_class_key s_lock_key;
 	struct lock_class_key s_umount_key;
 	struct lock_class_key s_vfs_rename_key;
+	struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];
 
 	struct lock_class_key i_lock_key;
 	struct lock_class_key i_mutex_key;
-- 
cgit v1.2.3


From d9c95bdd53a8d9116d269c91ce3d151472e6bcd6 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 12 Jun 2012 16:20:47 +0200
Subject: fs: Remove old freezing mechanism

Now that all users are converted, we can remove functions, variables, and
constants defined by the old freezing mechanism.

BugLink: https://bugs.launchpad.net/bugs/897421
Tested-by: Kamal Mostafa <kamal@canonical.com>
Tested-by: Peter M. Petrakis <peter.petrakis@canonical.com>
Tested-by: Dann Frazier <dann.frazier@canonical.com>
Tested-by: Massimo Morana <massimo.morana@canonical.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index aefed9426b03..0f4b79be8717 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1459,7 +1459,6 @@ extern spinlock_t sb_lock;
 enum {
 	SB_UNFROZEN = 0,		/* FS is unfrozen */
 	SB_FREEZE_WRITE	= 1,		/* Writes, dir ops, ioctls frozen */
-	SB_FREEZE_TRANS = 2,
 	SB_FREEZE_PAGEFAULT = 2,	/* Page faults stopped as well */
 	SB_FREEZE_FS = 3,		/* For internal FS use (e.g. to stop
 					 * internal threads if needed) */
@@ -1528,8 +1527,6 @@ struct super_block {
 	struct hlist_node	s_instances;
 	struct quota_info	s_dquot;	/* Diskquota specific options */
 
-	int			s_frozen;
-	wait_queue_head_t	s_wait_unfrozen;
 	struct sb_writers	s_writers;
 
 	char s_id[32];				/* Informational name */
@@ -1585,8 +1582,6 @@ extern struct timespec current_fs_time(struct super_block *sb);
 /*
  * Snapshotting support.
  */
-/* Will go away when all users are converted */
-#define vfs_check_frozen(sb, level) do { } while (0)
 
 void __sb_end_write(struct super_block *sb, int level);
 int __sb_start_write(struct super_block *sb, int level, bool wait);
-- 
cgit v1.2.3


From 9cbb17508808f8a6bdd83354b61e126ac4fa6fed Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 31 Jul 2012 09:08:14 +0200
Subject: blk: centralize non-request unplug handling.

Both md and umem has similar code for getting notified on an
blk_finish_plug event.
Centralize this code in block/ and allow each driver to
provide its distinctive difference.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3816ce8a08fc..607ca228f47e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -922,11 +922,15 @@ struct blk_plug {
 };
 #define BLK_MAX_REQUEST_COUNT 16
 
+struct blk_plug_cb;
+typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *);
 struct blk_plug_cb {
 	struct list_head list;
-	void (*callback)(struct blk_plug_cb *);
+	blk_plug_cb_fn callback;
+	void *data;
 };
-
+extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug,
+					     void *data, int size);
 extern void blk_start_plug(struct blk_plug *);
 extern void blk_finish_plug(struct blk_plug *);
 extern void blk_flush_plug_list(struct blk_plug *, bool);
-- 
cgit v1.2.3


From 74018dc3063a2c729fc73041c0a9f03aac995920 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 31 Jul 2012 09:08:15 +0200
Subject: blk: pass from_schedule to non-request unplug functions.

This will allow md/raid to know why the unplug was called,
and will be able to act according - if !from_schedule it
is safe to perform tasks which could themselves schedule.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 607ca228f47e..4e72a9d48232 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -923,7 +923,7 @@ struct blk_plug {
 #define BLK_MAX_REQUEST_COUNT 16
 
 struct blk_plug_cb;
-typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *);
+typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool);
 struct blk_plug_cb {
 	struct list_head list;
 	blk_plug_cb_fn callback;
-- 
cgit v1.2.3


From 7bedaa5537604f34d1d63c5ec7891e559d2a61ed Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 13 Apr 2012 12:10:24 +0100
Subject: dmaengine: add OMAP DMA engine driver

Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/omap-dma.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 include/linux/omap-dma.h

(limited to 'include/linux')

diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
new file mode 100644
index 000000000000..eb475a8ea25b
--- /dev/null
+++ b/include/linux/omap-dma.h
@@ -0,0 +1,22 @@
+/*
+ * OMAP DMA Engine support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __LINUX_OMAP_DMA_H
+#define __LINUX_OMAP_DMA_H
+
+struct dma_chan;
+
+#if defined(CONFIG_DMA_OMAP) || defined(CONFIG_DMA_OMAP_MODULE)
+bool omap_dma_filter_fn(struct dma_chan *, void *);
+#else
+static inline bool omap_dma_filter_fn(struct dma_chan *c, void *d)
+{
+	return false;
+}
+#endif
+
+#endif
-- 
cgit v1.2.3


From 0f26d0e0a715556270d85b7946b99546a2f92888 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Mon, 30 Jul 2012 22:44:41 -0500
Subject: kdb: Remove unused KDB_FLAG_ONLY_DO_DUMP

This code cleanup was missed in the original kdb merge, and this code
is simply not used at all.  The code that was previously used to set
the KDB_FLAG_ONLY_DO_DUMP was removed prior to the initial kdb merge.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kdb.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 064725854db8..42d9e863a313 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -75,8 +75,6 @@ extern const char *kdb_diemsg;
 #define KDB_FLAG_CATASTROPHIC	(1 << 1) /* A catastrophic event has occurred */
 #define KDB_FLAG_CMD_INTERRUPT	(1 << 2) /* Previous command was interrupted */
 #define KDB_FLAG_NOIPI		(1 << 3) /* Do not send IPIs */
-#define KDB_FLAG_ONLY_DO_DUMP	(1 << 4) /* Only do a dump, used when
-					  * kdb is off */
 #define KDB_FLAG_NO_CONSOLE	(1 << 5) /* No console is available,
 					  * kdb is disabled */
 #define KDB_FLAG_NO_VT_CONSOLE	(1 << 6) /* No VT console is available, do
-- 
cgit v1.2.3


From cba3345cc494ad286ca8823f44b2c16cae496679 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 31 Jul 2012 08:16:22 -0600
Subject: vfio: VFIO core

VFIO is a secure user level driver for use with both virtual machines
and user level drivers.  VFIO makes use of IOMMU groups to ensure the
isolation of devices in use, allowing unprivileged user access.  It's
intended that VFIO will replace KVM device assignment and UIO drivers
(in cases where the target platform includes a sufficiently capable
IOMMU).

New in this version of VFIO is support for IOMMU groups managed
through the IOMMU core as well as a rework of the API, removing the
group merge interface.  We now go back to a model more similar to
original VFIO with UIOMMU support where the file descriptor obtained
from /dev/vfio/vfio allows access to the IOMMU, but only after a
group is added, avoiding the previous privilege issues with this type
of model.  IOMMU support is also now fully modular as IOMMUs have
vastly different interface requirements on different platforms.  VFIO
users are able to query and initialize the IOMMU model of their
choice.

Please see the follow-on Documentation commit for further description
and usage example.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/vfio.h | 367 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 367 insertions(+)
 create mode 100644 include/linux/vfio.h

(limited to 'include/linux')

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
new file mode 100644
index 000000000000..03e56a5154b6
--- /dev/null
+++ b/include/linux/vfio.h
@@ -0,0 +1,367 @@
+/*
+ * VFIO API definition
+ *
+ * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef VFIO_H
+#define VFIO_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define VFIO_API_VERSION	0
+
+#ifdef __KERNEL__	/* Internal VFIO-core/bus driver API */
+
+#include <linux/iommu.h>
+#include <linux/mm.h>
+
+/**
+ * struct vfio_device_ops - VFIO bus driver device callbacks
+ *
+ * @open: Called when userspace creates new file descriptor for device
+ * @release: Called when userspace releases file descriptor for device
+ * @read: Perform read(2) on device file descriptor
+ * @write: Perform write(2) on device file descriptor
+ * @ioctl: Perform ioctl(2) on device file descriptor, supporting VFIO_DEVICE_*
+ *         operations documented below
+ * @mmap: Perform mmap(2) on a region of the device file descriptor
+ */
+struct vfio_device_ops {
+	char	*name;
+	int	(*open)(void *device_data);
+	void	(*release)(void *device_data);
+	ssize_t	(*read)(void *device_data, char __user *buf,
+			size_t count, loff_t *ppos);
+	ssize_t	(*write)(void *device_data, const char __user *buf,
+			 size_t count, loff_t *size);
+	long	(*ioctl)(void *device_data, unsigned int cmd,
+			 unsigned long arg);
+	int	(*mmap)(void *device_data, struct vm_area_struct *vma);
+};
+
+extern int vfio_add_group_dev(struct device *dev,
+			      const struct vfio_device_ops *ops,
+			      void *device_data);
+
+extern void *vfio_del_group_dev(struct device *dev);
+
+/**
+ * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
+ */
+struct vfio_iommu_driver_ops {
+	char		*name;
+	struct module	*owner;
+	void		*(*open)(unsigned long arg);
+	void		(*release)(void *iommu_data);
+	ssize_t		(*read)(void *iommu_data, char __user *buf,
+				size_t count, loff_t *ppos);
+	ssize_t		(*write)(void *iommu_data, const char __user *buf,
+				 size_t count, loff_t *size);
+	long		(*ioctl)(void *iommu_data, unsigned int cmd,
+				 unsigned long arg);
+	int		(*mmap)(void *iommu_data, struct vm_area_struct *vma);
+	int		(*attach_group)(void *iommu_data,
+					struct iommu_group *group);
+	void		(*detach_group)(void *iommu_data,
+					struct iommu_group *group);
+
+};
+
+extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
+
+extern void vfio_unregister_iommu_driver(
+				const struct vfio_iommu_driver_ops *ops);
+
+/**
+ * offsetofend(TYPE, MEMBER)
+ *
+ * @TYPE: The type of the structure
+ * @MEMBER: The member within the structure to get the end offset of
+ *
+ * Simple helper macro for dealing with variable sized structures passed
+ * from user space.  This allows us to easily determine if the provided
+ * structure is sized to include various fields.
+ */
+#define offsetofend(TYPE, MEMBER) ({				\
+	TYPE tmp;						\
+	offsetof(TYPE, MEMBER) + sizeof(tmp.MEMBER); })		\
+
+#endif /* __KERNEL__ */
+
+/* Kernel & User level defines for VFIO IOCTLs. */
+
+/* Extensions */
+
+/* None yet */
+
+/*
+ * The IOCTL interface is designed for extensibility by embedding the
+ * structure length (argsz) and flags into structures passed between
+ * kernel and userspace.  We therefore use the _IO() macro for these
+ * defines to avoid implicitly embedding a size into the ioctl request.
+ * As structure fields are added, argsz will increase to match and flag
+ * bits will be defined to indicate additional fields with valid data.
+ * It's *always* the caller's responsibility to indicate the size of
+ * the structure passed by setting argsz appropriately.
+ */
+
+#define VFIO_TYPE	(';')
+#define VFIO_BASE	100
+
+/* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */
+
+/**
+ * VFIO_GET_API_VERSION - _IO(VFIO_TYPE, VFIO_BASE + 0)
+ *
+ * Report the version of the VFIO API.  This allows us to bump the entire
+ * API version should we later need to add or change features in incompatible
+ * ways.
+ * Return: VFIO_API_VERSION
+ * Availability: Always
+ */
+#define VFIO_GET_API_VERSION		_IO(VFIO_TYPE, VFIO_BASE + 0)
+
+/**
+ * VFIO_CHECK_EXTENSION - _IOW(VFIO_TYPE, VFIO_BASE + 1, __u32)
+ *
+ * Check whether an extension is supported.
+ * Return: 0 if not supported, 1 (or some other positive integer) if supported.
+ * Availability: Always
+ */
+#define VFIO_CHECK_EXTENSION		_IO(VFIO_TYPE, VFIO_BASE + 1)
+
+/**
+ * VFIO_SET_IOMMU - _IOW(VFIO_TYPE, VFIO_BASE + 2, __s32)
+ *
+ * Set the iommu to the given type.  The type must be supported by an
+ * iommu driver as verified by calling CHECK_EXTENSION using the same
+ * type.  A group must be set to this file descriptor before this
+ * ioctl is available.  The IOMMU interfaces enabled by this call are
+ * specific to the value set.
+ * Return: 0 on success, -errno on failure
+ * Availability: When VFIO group attached
+ */
+#define VFIO_SET_IOMMU			_IO(VFIO_TYPE, VFIO_BASE + 2)
+
+/* -------- IOCTLs for GROUP file descriptors (/dev/vfio/$GROUP) -------- */
+
+/**
+ * VFIO_GROUP_GET_STATUS - _IOR(VFIO_TYPE, VFIO_BASE + 3,
+ *						struct vfio_group_status)
+ *
+ * Retrieve information about the group.  Fills in provided
+ * struct vfio_group_info.  Caller sets argsz.
+ * Return: 0 on succes, -errno on failure.
+ * Availability: Always
+ */
+struct vfio_group_status {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_GROUP_FLAGS_VIABLE		(1 << 0)
+#define VFIO_GROUP_FLAGS_CONTAINER_SET	(1 << 1)
+};
+#define VFIO_GROUP_GET_STATUS		_IO(VFIO_TYPE, VFIO_BASE + 3)
+
+/**
+ * VFIO_GROUP_SET_CONTAINER - _IOW(VFIO_TYPE, VFIO_BASE + 4, __s32)
+ *
+ * Set the container for the VFIO group to the open VFIO file
+ * descriptor provided.  Groups may only belong to a single
+ * container.  Containers may, at their discretion, support multiple
+ * groups.  Only when a container is set are all of the interfaces
+ * of the VFIO file descriptor and the VFIO group file descriptor
+ * available to the user.
+ * Return: 0 on success, -errno on failure.
+ * Availability: Always
+ */
+#define VFIO_GROUP_SET_CONTAINER	_IO(VFIO_TYPE, VFIO_BASE + 4)
+
+/**
+ * VFIO_GROUP_UNSET_CONTAINER - _IO(VFIO_TYPE, VFIO_BASE + 5)
+ *
+ * Remove the group from the attached container.  This is the
+ * opposite of the SET_CONTAINER call and returns the group to
+ * an initial state.  All device file descriptors must be released
+ * prior to calling this interface.  When removing the last group
+ * from a container, the IOMMU will be disabled and all state lost,
+ * effectively also returning the VFIO file descriptor to an initial
+ * state.
+ * Return: 0 on success, -errno on failure.
+ * Availability: When attached to container
+ */
+#define VFIO_GROUP_UNSET_CONTAINER	_IO(VFIO_TYPE, VFIO_BASE + 5)
+
+/**
+ * VFIO_GROUP_GET_DEVICE_FD - _IOW(VFIO_TYPE, VFIO_BASE + 6, char)
+ *
+ * Return a new file descriptor for the device object described by
+ * the provided string.  The string should match a device listed in
+ * the devices subdirectory of the IOMMU group sysfs entry.  The
+ * group containing the device must already be added to this context.
+ * Return: new file descriptor on success, -errno on failure.
+ * Availability: When attached to container
+ */
+#define VFIO_GROUP_GET_DEVICE_FD	_IO(VFIO_TYPE, VFIO_BASE + 6)
+
+/* --------------- IOCTLs for DEVICE file descriptors --------------- */
+
+/**
+ * VFIO_DEVICE_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 7,
+ *						struct vfio_device_info)
+ *
+ * Retrieve information about the device.  Fills in provided
+ * struct vfio_device_info.  Caller sets argsz.
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_device_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DEVICE_FLAGS_RESET	(1 << 0)	/* Device supports reset */
+	__u32	num_regions;	/* Max region index + 1 */
+	__u32	num_irqs;	/* Max IRQ index + 1 */
+};
+#define VFIO_DEVICE_GET_INFO		_IO(VFIO_TYPE, VFIO_BASE + 7)
+
+/**
+ * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
+ *				       struct vfio_region_info)
+ *
+ * Retrieve information about a device region.  Caller provides
+ * struct vfio_region_info with index value set.  Caller sets argsz.
+ * Implementation of region mapping is bus driver specific.  This is
+ * intended to describe MMIO, I/O port, as well as bus specific
+ * regions (ex. PCI config space).  Zero sized regions may be used
+ * to describe unimplemented regions (ex. unimplemented PCI BARs).
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_region_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_REGION_INFO_FLAG_READ	(1 << 0) /* Region supports read */
+#define VFIO_REGION_INFO_FLAG_WRITE	(1 << 1) /* Region supports write */
+#define VFIO_REGION_INFO_FLAG_MMAP	(1 << 2) /* Region supports mmap */
+	__u32	index;		/* Region index */
+	__u32	resv;		/* Reserved for alignment */
+	__u64	size;		/* Region size (bytes) */
+	__u64	offset;		/* Region offset from start of device fd */
+};
+#define VFIO_DEVICE_GET_REGION_INFO	_IO(VFIO_TYPE, VFIO_BASE + 8)
+
+/**
+ * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
+ *				    struct vfio_irq_info)
+ *
+ * Retrieve information about a device IRQ.  Caller provides
+ * struct vfio_irq_info with index value set.  Caller sets argsz.
+ * Implementation of IRQ mapping is bus driver specific.  Indexes
+ * using multiple IRQs are primarily intended to support MSI-like
+ * interrupt blocks.  Zero count irq blocks may be used to describe
+ * unimplemented interrupt types.
+ *
+ * The EVENTFD flag indicates the interrupt index supports eventfd based
+ * signaling.
+ *
+ * The MASKABLE flags indicates the index supports MASK and UNMASK
+ * actions described below.
+ *
+ * AUTOMASKED indicates that after signaling, the interrupt line is
+ * automatically masked by VFIO and the user needs to unmask the line
+ * to receive new interrupts.  This is primarily intended to distinguish
+ * level triggered interrupts.
+ *
+ * The NORESIZE flag indicates that the interrupt lines within the index
+ * are setup as a set and new subindexes cannot be enabled without first
+ * disabling the entire index.  This is used for interrupts like PCI MSI
+ * and MSI-X where the driver may only use a subset of the available
+ * indexes, but VFIO needs to enable a specific number of vectors
+ * upfront.  In the case of MSI-X, where the user can enable MSI-X and
+ * then add and unmask vectors, it's up to userspace to make the decision
+ * whether to allocate the maximum supported number of vectors or tear
+ * down setup and incrementally increase the vectors as each is enabled.
+ */
+struct vfio_irq_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IRQ_INFO_EVENTFD		(1 << 0)
+#define VFIO_IRQ_INFO_MASKABLE		(1 << 1)
+#define VFIO_IRQ_INFO_AUTOMASKED	(1 << 2)
+#define VFIO_IRQ_INFO_NORESIZE		(1 << 3)
+	__u32	index;		/* IRQ index */
+	__u32	count;		/* Number of IRQs within this index */
+};
+#define VFIO_DEVICE_GET_IRQ_INFO	_IO(VFIO_TYPE, VFIO_BASE + 9)
+
+/**
+ * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set)
+ *
+ * Set signaling, masking, and unmasking of interrupts.  Caller provides
+ * struct vfio_irq_set with all fields set.  'start' and 'count' indicate
+ * the range of subindexes being specified.
+ *
+ * The DATA flags specify the type of data provided.  If DATA_NONE, the
+ * operation performs the specified action immediately on the specified
+ * interrupt(s).  For example, to unmask AUTOMASKED interrupt [0,0]:
+ * flags = (DATA_NONE|ACTION_UNMASK), index = 0, start = 0, count = 1.
+ *
+ * DATA_BOOL allows sparse support for the same on arrays of interrupts.
+ * For example, to mask interrupts [0,1] and [0,3] (but not [0,2]):
+ * flags = (DATA_BOOL|ACTION_MASK), index = 0, start = 1, count = 3,
+ * data = {1,0,1}
+ *
+ * DATA_EVENTFD binds the specified ACTION to the provided __s32 eventfd.
+ * A value of -1 can be used to either de-assign interrupts if already
+ * assigned or skip un-assigned interrupts.  For example, to set an eventfd
+ * to be trigger for interrupts [0,0] and [0,2]:
+ * flags = (DATA_EVENTFD|ACTION_TRIGGER), index = 0, start = 0, count = 3,
+ * data = {fd1, -1, fd2}
+ * If index [0,1] is previously set, two count = 1 ioctls calls would be
+ * required to set [0,0] and [0,2] without changing [0,1].
+ *
+ * Once a signaling mechanism is set, DATA_BOOL or DATA_NONE can be used
+ * with ACTION_TRIGGER to perform kernel level interrupt loopback testing
+ * from userspace (ie. simulate hardware triggering).
+ *
+ * Setting of an event triggering mechanism to userspace for ACTION_TRIGGER
+ * enables the interrupt index for the device.  Individual subindex interrupts
+ * can be disabled using the -1 value for DATA_EVENTFD or the index can be
+ * disabled as a whole with: flags = (DATA_NONE|ACTION_TRIGGER), count = 0.
+ *
+ * Note that ACTION_[UN]MASK specify user->kernel signaling (irqfds) while
+ * ACTION_TRIGGER specifies kernel->user signaling.
+ */
+struct vfio_irq_set {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IRQ_SET_DATA_NONE		(1 << 0) /* Data not present */
+#define VFIO_IRQ_SET_DATA_BOOL		(1 << 1) /* Data is bool (u8) */
+#define VFIO_IRQ_SET_DATA_EVENTFD	(1 << 2) /* Data is eventfd (s32) */
+#define VFIO_IRQ_SET_ACTION_MASK	(1 << 3) /* Mask interrupt */
+#define VFIO_IRQ_SET_ACTION_UNMASK	(1 << 4) /* Unmask interrupt */
+#define VFIO_IRQ_SET_ACTION_TRIGGER	(1 << 5) /* Trigger interrupt */
+	__u32	index;
+	__u32	start;
+	__u32	count;
+	__u8	data[];
+};
+#define VFIO_DEVICE_SET_IRQS		_IO(VFIO_TYPE, VFIO_BASE + 10)
+
+#define VFIO_IRQ_SET_DATA_TYPE_MASK	(VFIO_IRQ_SET_DATA_NONE | \
+					 VFIO_IRQ_SET_DATA_BOOL | \
+					 VFIO_IRQ_SET_DATA_EVENTFD)
+#define VFIO_IRQ_SET_ACTION_TYPE_MASK	(VFIO_IRQ_SET_ACTION_MASK | \
+					 VFIO_IRQ_SET_ACTION_UNMASK | \
+					 VFIO_IRQ_SET_ACTION_TRIGGER)
+/**
+ * VFIO_DEVICE_RESET - _IO(VFIO_TYPE, VFIO_BASE + 11)
+ *
+ * Reset a device.
+ */
+#define VFIO_DEVICE_RESET		_IO(VFIO_TYPE, VFIO_BASE + 11)
+
+#endif /* VFIO_H */
-- 
cgit v1.2.3


From 73fa0d10d077d9521ee2dace2307ae2c9a965336 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 31 Jul 2012 08:16:23 -0600
Subject: vfio: Type1 IOMMU implementation

This VFIO IOMMU backend is designed primarily for AMD-Vi and Intel
VT-d hardware, but is potentially usable by anything supporting
similar mapping functionality.  We arbitrarily call this a Type1
backend for lack of a better name.  This backend has no IOVA
or host memory mapping restrictions for the user and is optimized
for relatively static mappings.  Mapped areas are pinned into system
memory.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/vfio.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 03e56a5154b6..acb046fd5b70 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -98,7 +98,7 @@ extern void vfio_unregister_iommu_driver(
 
 /* Extensions */
 
-/* None yet */
+#define VFIO_TYPE1_IOMMU		1
 
 /*
  * The IOCTL interface is designed for extensibility by embedding the
@@ -364,4 +364,56 @@ struct vfio_irq_set {
  */
 #define VFIO_DEVICE_RESET		_IO(VFIO_TYPE, VFIO_BASE + 11)
 
+/* -------- API for Type1 VFIO IOMMU -------- */
+
+/**
+ * VFIO_IOMMU_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 12, struct vfio_iommu_info)
+ *
+ * Retrieve information about the IOMMU object. Fills in provided
+ * struct vfio_iommu_info. Caller sets argsz.
+ *
+ * XXX Should we do these by CHECK_EXTENSION too?
+ */
+struct vfio_iommu_type1_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IOMMU_INFO_PGSIZES (1 << 0)	/* supported page sizes info */
+	__u64	iova_pgsizes;		/* Bitmap of supported page sizes */
+};
+
+#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
+
+/**
+ * VFIO_IOMMU_MAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 13, struct vfio_dma_map)
+ *
+ * Map process virtual addresses to IO virtual addresses using the
+ * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
+ */
+struct vfio_iommu_type1_dma_map {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DMA_MAP_FLAG_READ (1 << 0)		/* readable from device */
+#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1)	/* writable from device */
+	__u64	vaddr;				/* Process virtual address */
+	__u64	iova;				/* IO virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+};
+
+#define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
+
+/**
+ * VFIO_IOMMU_UNMAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 14, struct vfio_dma_unmap)
+ *
+ * Unmap IO virtual addresses using the provided struct vfio_dma_unmap.
+ * Caller sets argsz.
+ */
+struct vfio_iommu_type1_dma_unmap {
+	__u32	argsz;
+	__u32	flags;
+	__u64	iova;				/* IO virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+};
+
+#define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
+
 #endif /* VFIO_H */
-- 
cgit v1.2.3


From 89e1f7d4c66d85f42c3d52ea3866eb10cadf6153 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 31 Jul 2012 08:16:24 -0600
Subject: vfio: Add PCI device driver

Add PCI device support for VFIO.  PCI devices expose regions
for accessing config space, I/O port space, and MMIO areas
of the device.  PCI config access is virtualized in the kernel,
allowing us to ensure the integrity of the system, by preventing
various accesses while reducing duplicate support across various
userspace drivers.  I/O port supports read/write access while
MMIO also supports mmap of sufficiently sized regions.  Support
for INTx, MSI, and MSI-X interrupts are provided using eventfds to
userspace.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/vfio.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index acb046fd5b70..0a4f180a11d8 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -223,6 +223,7 @@ struct vfio_device_info {
 	__u32	argsz;
 	__u32	flags;
 #define VFIO_DEVICE_FLAGS_RESET	(1 << 0)	/* Device supports reset */
+#define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 };
@@ -364,6 +365,31 @@ struct vfio_irq_set {
  */
 #define VFIO_DEVICE_RESET		_IO(VFIO_TYPE, VFIO_BASE + 11)
 
+/*
+ * The VFIO-PCI bus driver makes use of the following fixed region and
+ * IRQ index mapping.  Unimplemented regions return a size of zero.
+ * Unimplemented IRQ types return a count of zero.
+ */
+
+enum {
+	VFIO_PCI_BAR0_REGION_INDEX,
+	VFIO_PCI_BAR1_REGION_INDEX,
+	VFIO_PCI_BAR2_REGION_INDEX,
+	VFIO_PCI_BAR3_REGION_INDEX,
+	VFIO_PCI_BAR4_REGION_INDEX,
+	VFIO_PCI_BAR5_REGION_INDEX,
+	VFIO_PCI_ROM_REGION_INDEX,
+	VFIO_PCI_CONFIG_REGION_INDEX,
+	VFIO_PCI_NUM_REGIONS
+};
+
+enum {
+	VFIO_PCI_INTX_IRQ_INDEX,
+	VFIO_PCI_MSI_IRQ_INDEX,
+	VFIO_PCI_MSIX_IRQ_INDEX,
+	VFIO_PCI_NUM_IRQS
+};
+
 /* -------- API for Type1 VFIO IOMMU -------- */
 
 /**
-- 
cgit v1.2.3


From e6dab5ffab59e910ec0e3355f4a6f29f7a7be474 Mon Sep 17 00:00:00 2001
From: Andrew Vagin <avagin@openvz.org>
Date: Wed, 11 Jul 2012 18:14:58 +0400
Subject: perf/trace: Add ability to set a target task for events

A few events are interesting not only for a current task.
For example, sched_stat_* events are interesting for a task
which wakes up. For this reason, it will be good if such
events will be delivered to a target task too.

Now a target task can be set by using __perf_task().

The original idea and a draft patch belongs to Peter Zijlstra.

I need these events for profiling sleep times. sched_switch is used for
getting callchains and sched_stat_* is used for getting time periods.
These events are combined in user space, then it can be analyzed by
perf tools.

Inspired-by: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Arun Sharma <asharma@fb.com>
Signed-off-by: Andrew Vagin <avagin@openvz.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1342016098-213063-1-git-send-email-avagin@openvz.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/ftrace_event.h | 5 +++--
 include/linux/perf_event.h   | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index af961d6f7ab1..642928cf57b4 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -306,9 +306,10 @@ extern void *perf_trace_buf_prepare(int size, unsigned short type,
 
 static inline void
 perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
-		       u64 count, struct pt_regs *regs, void *head)
+		       u64 count, struct pt_regs *regs, void *head,
+		       struct task_struct *task)
 {
-	perf_tp_event(addr, count, raw_data, size, regs, head, rctx);
+	perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task);
 }
 #endif
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 76c5c8b724a7..7602ccb3f40e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1272,7 +1272,8 @@ static inline bool perf_paranoid_kernel(void)
 extern void perf_event_init(void);
 extern void perf_tp_event(u64 addr, u64 count, void *record,
 			  int entry_size, struct pt_regs *regs,
-			  struct hlist_head *head, int rctx);
+			  struct hlist_head *head, int rctx,
+			  struct task_struct *task);
 extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
-- 
cgit v1.2.3


From a7ea3bbf5d58f4df2265d312f91d5769eabc8144 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 27 Jul 2012 14:48:09 -0400
Subject: time/jiffies: Allow CLOCK_TICK_RATE to be undefined

CLOCK_TICK_RATE is a legacy constant that defines the timer
device's granularity. On hardware with particularly coarse
granularity, this constant is used to reduce accumulated
time error when using jiffies as a clocksource, by calculating
the hardware's actual tick length rather then just assuming
it is 1sec/HZ.

However, for the most part this is unnecessary, as most modern
systems don't use jiffies for their clocksource, and their
tick device is sufficiently fine grained to avoid major error.

Thus, this patch allows an architecture to not define
CLOCK_TICK_RATE, in which case ACTHZ defaults to (HZ << 8).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
[ Commit log & intention tweaks ]
Signed-off-by: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/1343414893-45779-2-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/jiffies.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 265e2c3cbd1c..7d24466fb80b 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -39,9 +39,6 @@
 # error Invalid value of HZ.
 #endif
 
-/* LATCH is used in the interval timer and ftape setup. */
-#define LATCH  ((CLOCK_TICK_RATE + HZ/2) / HZ)	/* For divider */
-
 /* Suppose we want to divide two numbers NOM and DEN: NOM/DEN, then we can
  * improve accuracy by shifting LSH bits, hence calculating:
  *     (NOM << LSH) / DEN
@@ -54,8 +51,15 @@
 #define SH_DIV(NOM,DEN,LSH) (   (((NOM) / (DEN)) << (LSH))              \
                              + ((((NOM) % (DEN)) << (LSH)) + (DEN) / 2) / (DEN))
 
+#ifdef CLOCK_TICK_RATE
+/* LATCH is used in the interval timer and ftape setup. */
+# define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ)	/* For divider */
+
 /* HZ is the requested value. ACTHZ is actual HZ ("<< 8" is for accuracy) */
-#define ACTHZ (SH_DIV (CLOCK_TICK_RATE, LATCH, 8))
+# define ACTHZ (SH_DIV(CLOCK_TICK_RATE, LATCH, 8))
+#else
+# define ACTHZ (HZ << 8)
+#endif
 
 /* TICK_NSEC is the time between ticks in nsec assuming real ACTHZ */
 #define TICK_NSEC (SH_DIV (1000000UL * 1000, ACTHZ, 8))
-- 
cgit v1.2.3


From 02ab20ae38337b99b5c29c81090f594b8fd61283 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Fri, 27 Jul 2012 14:48:10 -0400
Subject: time/jiffies: Rename ACTHZ to SHIFTED_HZ

Ingo noted that ACTHZ is a confusing name, and requested it
be renamed, so this patch renames ACTHZ to SHIFTED_HZ to
better describe it.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Link: http://lkml.kernel.org/r/1343414893-45779-3-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/jiffies.h | 21 +++++++++++++--------
 include/linux/timex.h   |  2 +-
 2 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 7d24466fb80b..82680541576d 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -55,21 +55,26 @@
 /* LATCH is used in the interval timer and ftape setup. */
 # define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ)	/* For divider */
 
-/* HZ is the requested value. ACTHZ is actual HZ ("<< 8" is for accuracy) */
-# define ACTHZ (SH_DIV(CLOCK_TICK_RATE, LATCH, 8))
+/*
+ * HZ is the requested value. However the CLOCK_TICK_RATE may not allow
+ * for exactly HZ. So SHIFTED_HZ is high res HZ ("<< 8" is for accuracy)
+ */
+# define SHIFTED_HZ (SH_DIV(CLOCK_TICK_RATE, LATCH, 8))
 #else
-# define ACTHZ (HZ << 8)
+# define SHIFTED_HZ (HZ << 8)
 #endif
 
-/* TICK_NSEC is the time between ticks in nsec assuming real ACTHZ */
-#define TICK_NSEC (SH_DIV (1000000UL * 1000, ACTHZ, 8))
+/* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */
+#define TICK_NSEC (SH_DIV(1000000UL * 1000, SHIFTED_HZ, 8))
 
 /* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
 #define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
 
-/* TICK_USEC_TO_NSEC is the time between ticks in nsec assuming real ACTHZ and	*/
-/* a value TUSEC for TICK_USEC (can be set bij adjtimex)		*/
-#define TICK_USEC_TO_NSEC(TUSEC) (SH_DIV (TUSEC * USER_HZ * 1000, ACTHZ, 8))
+/*
+ * TICK_USEC_TO_NSEC is the time between ticks in nsec assuming SHIFTED_HZ and
+ * a value TUSEC for TICK_USEC (can be set bij adjtimex)
+ */
+#define TICK_USEC_TO_NSEC(TUSEC) (SH_DIV(TUSEC * USER_HZ * 1000, SHIFTED_HZ, 8))
 
 /* some arch's have a small-data section that can be accessed register-relative
  * but that can only take up to, say, 4-byte variables. jiffies being part of
diff --git a/include/linux/timex.h b/include/linux/timex.h
index 99bc88b1fc02..7c5ceb20e03a 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -232,7 +232,7 @@ struct timex {
  * estimated error = NTP dispersion.
  */
 extern unsigned long tick_usec;		/* USER_HZ period (usec) */
-extern unsigned long tick_nsec;		/* ACTHZ          period (nsec) */
+extern unsigned long tick_nsec;		/* SHIFTED_HZ period (nsec) */
 
 extern void ntp_init(void);
 extern void ntp_clear(void);
-- 
cgit v1.2.3


From 44de9d0cad41f2c51ef26916842be046b582dcc9 Mon Sep 17 00:00:00 2001
From: Huang Shijie <shijie8@gmail.com>
Date: Tue, 31 Jul 2012 16:41:49 -0700
Subject: mm: account the total_vm in the vm_stat_account()

vm_stat_account() accounts the shared_vm, stack_vm and reserved_vm now.
But we can also account for total_vm in the vm_stat_account() which makes
the code tidy.

Even for mprotect_fixup(), we can get the right result in the end.

Signed-off-by: Huang Shijie <shijie8@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f9f279cf5b1b..3955bedeeed1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1528,6 +1528,7 @@ void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
 static inline void vm_stat_account(struct mm_struct *mm,
 			unsigned long flags, struct file *file, long pages)
 {
+	mm->total_vm += pages;
 }
 #endif /* CONFIG_PROC_FS */
 
-- 
cgit v1.2.3


From 3965c9ae47d64aadf6f13b6fcd37767b83c0689a Mon Sep 17 00:00:00 2001
From: Wanpeng Li <liwp@linux.vnet.ibm.com>
Date: Tue, 31 Jul 2012 16:41:52 -0700
Subject: mm: prepare for removal of obsolete /proc/sys/vm/nr_pdflush_threads

Since per-BDI flusher threads were introduced in 2.6, the pdflush
mechanism is not used any more.  But the old interface exported through
/proc/sys/vm/nr_pdflush_threads still exists and is obviously useless.

For back-compatibility, printk warning information and return 2 to notify
the users that the interface is removed.

Signed-off-by: Wanpeng Li <liwp@linux.vnet.ibm.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev.h | 3 +++
 include/linux/writeback.h   | 5 -----
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 489de625cd25..c97c6b9cd38e 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -17,6 +17,7 @@
 #include <linux/timer.h>
 #include <linux/writeback.h>
 #include <linux/atomic.h>
+#include <linux/sysctl.h>
 
 struct page;
 struct device;
@@ -304,6 +305,8 @@ void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
 void set_bdi_congested(struct backing_dev_info *bdi, int sync);
 long congestion_wait(int sync, long timeout);
 long wait_iff_congested(struct zone *zone, int sync, long timeout);
+int pdflush_proc_obsolete(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp, loff_t *ppos);
 
 static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
 {
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 6d0a0fcd80e7..c66fe3332d83 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -189,9 +189,4 @@ void tag_pages_for_writeback(struct address_space *mapping,
 
 void account_page_redirty(struct page *page);
 
-/* pdflush.c */
-extern int nr_pdflush_threads;	/* Global so it can be exported to sysctl
-				   read-only. */
-
-
 #endif		/* WRITEBACK_H */
-- 
cgit v1.2.3


From 972dc4de13f667a7df27ee32573b2e6fc6cc8434 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 31 Jul 2012 16:42:00 -0700
Subject: hugetlb: add an inline helper for finding hstate index

Add an inline helper and use it in the code.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d5d6bbe2259e..217f52859fa7 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -302,6 +302,11 @@ static inline unsigned hstate_index_to_shift(unsigned index)
 	return hstates[index].order + PAGE_SHIFT;
 }
 
+static inline int hstate_index(struct hstate *h)
+{
+	return h - hstates;
+}
+
 #else
 struct hstate {};
 #define alloc_huge_page_node(h, nid) NULL
@@ -320,6 +325,7 @@ static inline unsigned int pages_per_huge_page(struct hstate *h)
 	return 1;
 }
 #define hstate_index_to_shift(index) 0
+#define hstate_index(h) 0
 #endif
 
 #endif /* _LINUX_HUGETLB_H */
-- 
cgit v1.2.3


From 24669e58477e2752c1fbca9c1c988e9dd0d79d15 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 31 Jul 2012 16:42:03 -0700
Subject: hugetlb: use mmu_gather instead of a temporary linked list for
 accumulating pages

Use a mmu_gather instead of a temporary linked list for accumulating pages
when we unmap a hugepage range

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 217f52859fa7..0f23c1840c9b 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -7,6 +7,7 @@
 
 struct ctl_table;
 struct user_struct;
+struct mmu_gather;
 
 #ifdef CONFIG_HUGETLB_PAGE
 
@@ -40,9 +41,10 @@ int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
 			struct page **, struct vm_area_struct **,
 			unsigned long *, int *, int, unsigned int flags);
 void unmap_hugepage_range(struct vm_area_struct *,
-			unsigned long, unsigned long, struct page *);
-void __unmap_hugepage_range(struct vm_area_struct *,
-			unsigned long, unsigned long, struct page *);
+			  unsigned long, unsigned long, struct page *);
+void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+				unsigned long start, unsigned long end,
+				struct page *ref_page);
 int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
 void hugetlb_report_meminfo(struct seq_file *);
 int hugetlb_report_node_meminfo(int, char *);
@@ -98,7 +100,6 @@ static inline unsigned long hugetlb_total_pages(void)
 #define follow_huge_addr(mm, addr, write)	ERR_PTR(-EINVAL)
 #define copy_hugetlb_page_range(src, dst, vma)	({ BUG(); 0; })
 #define hugetlb_prefault(mapping, vma)		({ BUG(); 0; })
-#define unmap_hugepage_range(vma, start, end, page)	BUG()
 static inline void hugetlb_report_meminfo(struct seq_file *m)
 {
 }
@@ -112,13 +113,24 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
 #define hugetlb_fault(mm, vma, addr, flags)	({ BUG(); 0; })
 #define huge_pte_offset(mm, address)	0
-#define dequeue_hwpoisoned_huge_page(page)	0
+static inline int dequeue_hwpoisoned_huge_page(struct page *page)
+{
+	return 0;
+}
+
 static inline void copy_huge_page(struct page *dst, struct page *src)
 {
 }
 
 #define hugetlb_change_protection(vma, address, end, newprot)
 
+static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
+			struct vm_area_struct *vma, unsigned long start,
+			unsigned long end, struct page *ref_page)
+{
+	BUG();
+}
+
 #endif /* !CONFIG_HUGETLB_PAGE */
 
 #define HUGETLB_ANON_FILE "anon_hugepage"
-- 
cgit v1.2.3


From 189ebff2894a9d0f4e250dd1e154d282ef0a6779 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 31 Jul 2012 16:42:06 -0700
Subject: hugetlb: simplify migrate_huge_page()

Since we migrate only one hugepage, don't use linked list for passing the
page around.  Directly pass the page that need to be migrated as argument.
This also removes the usage of page->lru in the migrate path.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hillf Danton <dhillf@gmail.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 855c337b20c3..ce7e6671968b 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -15,7 +15,7 @@ extern int migrate_page(struct address_space *,
 extern int migrate_pages(struct list_head *l, new_page_t x,
 			unsigned long private, bool offlining,
 			enum migrate_mode mode);
-extern int migrate_huge_pages(struct list_head *l, new_page_t x,
+extern int migrate_huge_page(struct page *, new_page_t x,
 			unsigned long private, bool offlining,
 			enum migrate_mode mode);
 
@@ -36,7 +36,7 @@ static inline void putback_lru_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
 		unsigned long private, bool offlining,
 		enum migrate_mode mode) { return -ENOSYS; }
-static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
+static inline int migrate_huge_page(struct page *page, new_page_t x,
 		unsigned long private, bool offlining,
 		enum migrate_mode mode) { return -ENOSYS; }
 
-- 
cgit v1.2.3


From 0edaecfab218d747d30de4575e911907371e2cd2 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 31 Jul 2012 16:42:07 -0700
Subject: hugetlb: add a list for tracking in-use HugeTLB pages

hugepage_activelist will be used to track currently used HugeTLB pages.
We need to find the in-use HugeTLB pages to support HugeTLB cgroup removal.
On cgroup removal we update the page's HugeTLB cgroup to point to parent
cgroup.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hillf Danton <dhillf@gmail.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0f23c1840c9b..ed550d819044 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -211,6 +211,7 @@ struct hstate {
 	unsigned long resv_huge_pages;
 	unsigned long surplus_huge_pages;
 	unsigned long nr_overcommit_huge_pages;
+	struct list_head hugepage_activelist;
 	struct list_head hugepage_freelists[MAX_NUMNODES];
 	unsigned int nr_huge_pages_node[MAX_NUMNODES];
 	unsigned int free_huge_pages_node[MAX_NUMNODES];
-- 
cgit v1.2.3


From c3f38a38715e9b4e7b1dda6840a1375f6894744d Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 31 Jul 2012 16:42:10 -0700
Subject: hugetlb: make some static variables global

We will use them later in hugetlb_cgroup.c

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hillf Danton <dhillf@gmail.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ed550d819044..3d677dd41898 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -21,6 +21,11 @@ struct hugepage_subpool {
 	long max_hpages, used_hpages;
 };
 
+extern spinlock_t hugetlb_lock;
+extern int hugetlb_max_hstate __read_mostly;
+#define for_each_hstate(h) \
+	for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++)
+
 struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
 void hugepage_put_subpool(struct hugepage_subpool *spool);
 
-- 
cgit v1.2.3


From 2bc64a2046975410505bb119bba32705892b9255 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 31 Jul 2012 16:42:12 -0700
Subject: mm/hugetlb: add new HugeTLB cgroup

Implement a new controller that allows us to control HugeTLB allocations.
The extension allows to limit the HugeTLB usage per control group and
enforces the controller limit during page fault.  Since HugeTLB doesn't
support page reclaim, enforcing the limit at page fault time implies that,
the application will get SIGBUS signal if it tries to access HugeTLB pages
beyond its limit.  This requires the application to know beforehand how
much HugeTLB pages it would require for its use.

The charge/uncharge calls will be added to HugeTLB code in later patch.
Support for cgroup removal will be added in later patches.

[akpm@linux-foundation.org: s/CONFIG_CGROUP_HUGETLB_RES_CTLR/CONFIG_MEMCG_HUGETLB/g]
[akpm@linux-foundation.org: s/CONFIG_MEMCG_HUGETLB/CONFIG_CGROUP_HUGETLB/g]
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hillf Danton <dhillf@gmail.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup_subsys.h  |  6 ++++++
 include/linux/hugetlb_cgroup.h | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 include/linux/hugetlb_cgroup.h

(limited to 'include/linux')

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 0bd390ce98b2..5b41ce079024 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -72,3 +72,9 @@ SUBSYS(net_prio)
 #endif
 
 /* */
+
+#ifdef CONFIG_CGROUP_HUGETLB
+SUBSYS(hugetlb)
+#endif
+
+/* */
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
new file mode 100644
index 000000000000..f19889e56b47
--- /dev/null
+++ b/include/linux/hugetlb_cgroup.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright IBM Corporation, 2012
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#ifndef _LINUX_HUGETLB_CGROUP_H
+#define _LINUX_HUGETLB_CGROUP_H
+
+#include <linux/res_counter.h>
+
+struct hugetlb_cgroup;
+
+#ifdef CONFIG_CGROUP_HUGETLB
+static inline bool hugetlb_cgroup_disabled(void)
+{
+	if (hugetlb_subsys.disabled)
+		return true;
+	return false;
+}
+
+#else
+static inline bool hugetlb_cgroup_disabled(void)
+{
+	return true;
+}
+
+#endif  /* CONFIG_MEM_RES_CTLR_HUGETLB */
+#endif
-- 
cgit v1.2.3


From 9dd540e23111d8884773ab942a736f3aba4040d4 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 31 Jul 2012 16:42:15 -0700
Subject: hugetlb/cgroup: add the cgroup pointer to page lru

Add the hugetlb cgroup pointer to 3rd page lru.next.  This limit the usage
to hugetlb cgroup to only hugepages with 3 or more normal pages.  I guess
that is an acceptable limitation.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hillf Danton <dhillf@gmail.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb_cgroup.h | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index f19889e56b47..e5451a3b4ebc 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -18,8 +18,34 @@
 #include <linux/res_counter.h>
 
 struct hugetlb_cgroup;
+/*
+ * Minimum page order trackable by hugetlb cgroup.
+ * At least 3 pages are necessary for all the tracking information.
+ */
+#define HUGETLB_CGROUP_MIN_ORDER	2
 
 #ifdef CONFIG_CGROUP_HUGETLB
+
+static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
+{
+	VM_BUG_ON(!PageHuge(page));
+
+	if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
+		return NULL;
+	return (struct hugetlb_cgroup *)page[2].lru.next;
+}
+
+static inline
+int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg)
+{
+	VM_BUG_ON(!PageHuge(page));
+
+	if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
+		return -1;
+	page[2].lru.next = (void *)h_cg;
+	return 0;
+}
+
 static inline bool hugetlb_cgroup_disabled(void)
 {
 	if (hugetlb_subsys.disabled)
@@ -28,6 +54,17 @@ static inline bool hugetlb_cgroup_disabled(void)
 }
 
 #else
+static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
+{
+	return NULL;
+}
+
+static inline
+int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg)
+{
+	return 0;
+}
+
 static inline bool hugetlb_cgroup_disabled(void)
 {
 	return true;
-- 
cgit v1.2.3


From 6d76dcf40405144a448040a350fd214ddc243d5e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 31 Jul 2012 16:42:18 -0700
Subject: hugetlb/cgroup: add charge/uncharge routines for hugetlb cgroup

Add the charge and uncharge routines for hugetlb cgroup.  We do cgroup
charging in page alloc and uncharge in compound page destructor.
Assigning page's hugetlb cgroup is protected by hugetlb_lock.

[liwp@linux.vnet.ibm.com: add huge_page_order check to avoid incorrect uncharge]
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Wanpeng Li <liwp.linux@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb_cgroup.h | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index e5451a3b4ebc..7d3fde996be3 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -53,6 +53,16 @@ static inline bool hugetlb_cgroup_disabled(void)
 	return false;
 }
 
+extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
+					struct hugetlb_cgroup **ptr);
+extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
+					 struct hugetlb_cgroup *h_cg,
+					 struct page *page);
+extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
+					 struct page *page);
+extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
+					   struct hugetlb_cgroup *h_cg);
+
 #else
 static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
 {
@@ -70,5 +80,33 @@ static inline bool hugetlb_cgroup_disabled(void)
 	return true;
 }
 
+static inline int
+hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
+			     struct hugetlb_cgroup **ptr)
+{
+	return 0;
+}
+
+static inline void
+hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
+			     struct hugetlb_cgroup *h_cg,
+			     struct page *page)
+{
+	return;
+}
+
+static inline void
+hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page)
+{
+	return;
+}
+
+static inline void
+hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
+			       struct hugetlb_cgroup *h_cg)
+{
+	return;
+}
+
 #endif  /* CONFIG_MEM_RES_CTLR_HUGETLB */
 #endif
-- 
cgit v1.2.3


From abb8206cb07734d0b7bf033c715995d6371a94c3 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 31 Jul 2012 16:42:24 -0700
Subject: hugetlb/cgroup: add hugetlb cgroup control files

Add the control files for hugetlb controller

[akpm@linux-foundation.org: s/CONFIG_CGROUP_HUGETLB_RES_CTLR/CONFIG_MEMCG_HUGETLB/g]
[akpm@linux-foundation.org: s/CONFIG_MEMCG_HUGETLB/CONFIG_CGROUP_HUGETLB/]
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hillf Danton <dhillf@gmail.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h        | 5 +++++
 include/linux/hugetlb_cgroup.h | 6 ++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3d677dd41898..f9db20bfa9fc 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -4,6 +4,7 @@
 #include <linux/mm_types.h>
 #include <linux/fs.h>
 #include <linux/hugetlb_inline.h>
+#include <linux/cgroup.h>
 
 struct ctl_table;
 struct user_struct;
@@ -221,6 +222,10 @@ struct hstate {
 	unsigned int nr_huge_pages_node[MAX_NUMNODES];
 	unsigned int free_huge_pages_node[MAX_NUMNODES];
 	unsigned int surplus_huge_pages_node[MAX_NUMNODES];
+#ifdef CONFIG_CGROUP_HUGETLB
+	/* cgroup control files */
+	struct cftype cgroup_files[5];
+#endif
 	char name[HSTATE_NAME_LEN];
 };
 
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 7d3fde996be3..73f1e600fc12 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -62,6 +62,7 @@ extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 					 struct page *page);
 extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 					   struct hugetlb_cgroup *h_cg);
+extern int hugetlb_cgroup_file_init(int idx) __init;
 
 #else
 static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
@@ -108,5 +109,10 @@ hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 	return;
 }
 
+static inline int __init hugetlb_cgroup_file_init(int idx)
+{
+	return 0;
+}
+
 #endif  /* CONFIG_MEM_RES_CTLR_HUGETLB */
 #endif
-- 
cgit v1.2.3


From 8e6ac7fab374816de9a8b0a8fbb02ef761a30ff4 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 31 Jul 2012 16:42:27 -0700
Subject: hugetlb/cgroup: migrate hugetlb cgroup info from oldpage to new page
 during migration

With HugeTLB pages, hugetlb cgroup is uncharged in compound page
destructor.  Since we are holding a hugepage reference, we can be sure
that old page won't get uncharged till the last put_page().

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb_cgroup.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 73f1e600fc12..d73878c694b3 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -63,6 +63,8 @@ extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 					   struct hugetlb_cgroup *h_cg);
 extern int hugetlb_cgroup_file_init(int idx) __init;
+extern void hugetlb_cgroup_migrate(struct page *oldhpage,
+				   struct page *newhpage);
 
 #else
 static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
@@ -114,5 +116,11 @@ static inline int __init hugetlb_cgroup_file_init(int idx)
 	return 0;
 }
 
+static inline void hugetlb_cgroup_migrate(struct page *oldhpage,
+					  struct page *newhpage)
+{
+	return;
+}
+
 #endif  /* CONFIG_MEM_RES_CTLR_HUGETLB */
 #endif
-- 
cgit v1.2.3


From c59e26104e3e0e952cd7d63e79cd71ee5a9ec25a Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Tue, 31 Jul 2012 16:42:49 -0700
Subject: mm/compaction: cleanup on compaction_deferred

When CONFIG_COMPACTION is enabled, compaction_deferred() tries to
recalculate the deferred limit again, which isn't necessary.

When CONFIG_COMPACTION is disabled, compaction_deferred() should return
"true" or "false" since it has "bool" for its return value.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 51a90b7f2d60..133ddcf83397 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -58,7 +58,7 @@ static inline bool compaction_deferred(struct zone *zone, int order)
 	if (++zone->compact_considered > defer_limit)
 		zone->compact_considered = defer_limit;
 
-	return zone->compact_considered < (1UL << zone->compact_defer_shift);
+	return zone->compact_considered < defer_limit;
 }
 
 #else
@@ -85,7 +85,7 @@ static inline void defer_compaction(struct zone *zone, int order)
 
 static inline bool compaction_deferred(struct zone *zone, int order)
 {
-	return 1;
+	return true;
 }
 
 #endif /* CONFIG_COMPACTION */
-- 
cgit v1.2.3


From c255a458055e459f65eb7b7f51dc5dbdd0caf1d8 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 31 Jul 2012 16:43:02 -0700
Subject: memcg: rename config variables

Sanity:

CONFIG_CGROUP_MEM_RES_CTLR -> CONFIG_MEMCG
CONFIG_CGROUP_MEM_RES_CTLR_SWAP -> CONFIG_MEMCG_SWAP
CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED -> CONFIG_MEMCG_SWAP_ENABLED
CONFIG_CGROUP_MEM_RES_CTLR_KMEM -> CONFIG_MEMCG_KMEM

[mhocko@suse.cz: fix missed bits]
Cc: Glauber Costa <glommer@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup_subsys.h |  2 +-
 include/linux/memcontrol.h    | 14 +++++++-------
 include/linux/mmzone.h        |  8 ++++----
 include/linux/page_cgroup.h   | 10 +++++-----
 include/linux/sched.h         |  2 +-
 include/linux/swap.h          |  6 +++---
 6 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 5b41ce079024..dfae957398c3 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -31,7 +31,7 @@ SUBSYS(cpuacct)
 
 /* */
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
 SUBSYS(mem_cgroup)
 #endif
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 83e7ba90d6e5..170076222431 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -38,7 +38,7 @@ struct mem_cgroup_reclaim_cookie {
 	unsigned int generation;
 };
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
 /*
  * All "charge" functions with gfp_mask should use GFP_KERNEL or
  * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't
@@ -124,7 +124,7 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 extern void mem_cgroup_replace_page_cache(struct page *oldpage,
 					struct page *newpage);
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+#ifdef CONFIG_MEMCG_SWAP
 extern int do_swap_account;
 #endif
 
@@ -193,7 +193,7 @@ void mem_cgroup_split_huge_fixup(struct page *head);
 bool mem_cgroup_bad_page_check(struct page *page);
 void mem_cgroup_print_bad_page(struct page *page);
 #endif
-#else /* CONFIG_CGROUP_MEM_RES_CTLR */
+#else /* CONFIG_MEMCG */
 struct mem_cgroup;
 
 static inline int mem_cgroup_newpage_charge(struct page *page,
@@ -384,9 +384,9 @@ static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
 				struct page *newpage)
 {
 }
-#endif /* CONFIG_CGROUP_MEM_RES_CTLR */
+#endif /* CONFIG_MEMCG */
 
-#if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)
+#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM)
 static inline bool
 mem_cgroup_bad_page_check(struct page *page)
 {
@@ -406,7 +406,7 @@ enum {
 };
 
 struct sock;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
 void sock_update_memcg(struct sock *sk);
 void sock_release_memcg(struct sock *sk);
 #else
@@ -416,6 +416,6 @@ static inline void sock_update_memcg(struct sock *sk)
 static inline void sock_release_memcg(struct sock *sk)
 {
 }
-#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
+#endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 458988bd55a1..3bdfa15b2012 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -201,7 +201,7 @@ struct zone_reclaim_stat {
 struct lruvec {
 	struct list_head lists[NR_LRU_LISTS];
 	struct zone_reclaim_stat reclaim_stat;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
 	struct zone *zone;
 #endif
 };
@@ -671,7 +671,7 @@ typedef struct pglist_data {
 	int nr_zones;
 #ifdef CONFIG_FLAT_NODE_MEM_MAP	/* means !SPARSEMEM */
 	struct page *node_mem_map;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
 	struct page_cgroup *node_page_cgroup;
 #endif
 #endif
@@ -736,7 +736,7 @@ extern void lruvec_init(struct lruvec *lruvec, struct zone *zone);
 
 static inline struct zone *lruvec_zone(struct lruvec *lruvec)
 {
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
 	return lruvec->zone;
 #else
 	return container_of(lruvec, struct zone, lruvec);
@@ -1052,7 +1052,7 @@ struct mem_section {
 
 	/* See declaration of similar field in struct zone */
 	unsigned long *pageblock_flags;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
 	/*
 	 * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
 	 * section. (see memcontrol.h/page_cgroup.h about this.)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index a88cdba27809..777a524716db 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -12,7 +12,7 @@ enum {
 #ifndef __GENERATING_BOUNDS_H
 #include <generated/bounds.h>
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
 #include <linux/bit_spinlock.h>
 
 /*
@@ -82,7 +82,7 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc)
 	bit_spin_unlock(PCG_LOCK, &pc->flags);
 }
 
-#else /* CONFIG_CGROUP_MEM_RES_CTLR */
+#else /* CONFIG_MEMCG */
 struct page_cgroup;
 
 static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
@@ -102,11 +102,11 @@ static inline void __init page_cgroup_init_flatmem(void)
 {
 }
 
-#endif /* CONFIG_CGROUP_MEM_RES_CTLR */
+#endif /* CONFIG_MEMCG */
 
 #include <linux/swap.h>
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+#ifdef CONFIG_MEMCG_SWAP
 extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
 					unsigned short old, unsigned short new);
 extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
@@ -138,7 +138,7 @@ static inline void swap_cgroup_swapoff(int type)
 	return;
 }
 
-#endif /* CONFIG_CGROUP_MEM_RES_CTLR_SWAP */
+#endif /* CONFIG_MEMCG_SWAP */
 
 #endif /* !__GENERATING_BOUNDS_H */
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 68dcffaa62a0..865725adb9d3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1584,7 +1584,7 @@ struct task_struct {
 	/* bitmask and counter of trace recursion */
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */
+#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
 	struct memcg_batch_info {
 		int do_batch;	/* incremented when batch uncharge started */
 		struct mem_cgroup *memcg; /* target memcg of uncharge */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index c84ec68eaec9..9a16bb1cefd1 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -301,7 +301,7 @@ static inline void scan_unevictable_unregister_node(struct node *node)
 
 extern int kswapd_run(int nid);
 extern void kswapd_stop(int nid);
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
 extern int mem_cgroup_swappiness(struct mem_cgroup *mem);
 #else
 static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
@@ -309,7 +309,7 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
 	return vm_swappiness;
 }
 #endif
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+#ifdef CONFIG_MEMCG_SWAP
 extern void mem_cgroup_uncharge_swap(swp_entry_t ent);
 #else
 static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
@@ -360,7 +360,7 @@ extern int reuse_swap_page(struct page *);
 extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
 extern void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
 #else
-- 
cgit v1.2.3


From ca28ddc908fcfef0e5c1b6e5df632db7fc26de10 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <liwp@linux.vnet.ibm.com>
Date: Tue, 31 Jul 2012 16:43:04 -0700
Subject: mm: remove unused LRU_ALL_EVICTABLE

Signed-off-by: Wanpeng Li <liwp.linux@gmail.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3bdfa15b2012..1495d952e641 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -209,7 +209,6 @@ struct lruvec {
 /* Mask used at gathering information at once (see memcontrol.c) */
 #define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE))
 #define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON))
-#define LRU_ALL_EVICTABLE (LRU_ALL_FILE | LRU_ALL_ANON)
 #define LRU_ALL	     ((1 << NR_LRU_LISTS) - 1)
 
 /* Isolate clean file */
-- 
cgit v1.2.3


From 7db8889ab05b57200158432755af318fb68854a2 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Tue, 31 Jul 2012 16:43:12 -0700
Subject: mm: have order > 0 compaction start off where it left

Order > 0 compaction stops when enough free pages of the correct page
order have been coalesced.  When doing subsequent higher order
allocations, it is possible for compaction to be invoked many times.

However, the compaction code always starts out looking for things to
compact at the start of the zone, and for free pages to compact things to
at the end of the zone.

This can cause quadratic behaviour, with isolate_freepages starting at the
end of the zone each time, even though previous invocations of the
compaction code already filled up all free memory on that end of the zone.

This can cause isolate_freepages to take enormous amounts of CPU with
certain workloads on larger memory systems.

The obvious solution is to have isolate_freepages remember where it left
off last time, and continue at that point the next time it gets invoked
for an order > 0 compaction.  This could cause compaction to fail if
cc->free_pfn and cc->migrate_pfn are close together initially, in that
case we restart from the end of the zone and try once more.

Forced full (order == -1) compactions are left alone.

[akpm@linux-foundation.org: checkpatch fixes]
[akpm@linux-foundation.org: s/laste/last/, use 80 cols]
Signed-off-by: Rik van Riel <riel@redhat.com>
Reported-by: Jim Schutt <jaschut@sandia.gov>
Tested-by: Jim Schutt <jaschut@sandia.gov>
Cc: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1495d952e641..1aeadce4d56e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -368,6 +368,10 @@ struct zone {
 	 */
 	spinlock_t		lock;
 	int                     all_unreclaimable; /* All pages pinned */
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+	/* pfn where the last incremental compaction isolated free pages */
+	unsigned long		compact_cached_free_pfn;
+#endif
 #ifdef CONFIG_MEMORY_HOTPLUG
 	/* see spanned/present_pages for more description */
 	seqlock_t		span_seqlock;
-- 
cgit v1.2.3


From fe03025db3f4ade1f231b174938e0fe224722759 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Tue, 31 Jul 2012 16:43:14 -0700
Subject: mm: CONFIG_HAVE_MEMBLOCK_NODE -> CONFIG_HAVE_MEMBLOCK_NODE_MAP

0ee332c14518699 ("memblock: Kill early_node_map[]") wanted to replace
CONFIG_ARCH_POPULATES_NODE_MAP with CONFIG_HAVE_MEMBLOCK_NODE_MAP but
ended up replacing one occurence with a reference to the non-existent
symbol CONFIG_HAVE_MEMBLOCK_NODE.

The resulting omission of code would probably have been causing problems
to 32-bit machines with memory hotplug.

Signed-off-by: Rabin Vincent <rabin@rab.in>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1aeadce4d56e..f64afa5929fe 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -776,7 +776,7 @@ extern int movable_zone;
 
 static inline int zone_movable_is_highmem(void)
 {
-#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE)
+#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
 	return movable_zone == ZONE_HIGHMEM;
 #else
 	return 0;
-- 
cgit v1.2.3


From 8e125cd85517c9716695b0abfabc0a4a3fcb94f3 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 31 Jul 2012 16:43:16 -0700
Subject: vmscan: remove obsolete shrink_control comment

09f363c7 ("vmscan: fix shrinker callback bug in fs/super.c") fixed a
shrinker callback which was returning -1 when nr_to_scan is zero, which
caused excessive slab scanning.  But 635697c6 ("vmscan: fix initial
shrinker size handling") fixed the problem, again so we can freely return
-1 although nr_to_scan is zero.  So let's revert 09f363c7 because the
comment added in 09f363c7 made an unnecessary rule.

Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shrinker.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 07ceb97d53fa..ac6b8ee07825 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -20,7 +20,6 @@ struct shrink_control {
  * 'nr_to_scan' entries and attempt to free them up.  It should return
  * the number of objects which remain in the cache.  If it returns -1, it means
  * it cannot do any scanning at this time (eg. there is a risk of deadlock).
- * The callback must not return -1 if nr_to_scan is zero.
  *
  * The 'gfpmask' refers to the allocation we are currently trying to
  * fulfil.
-- 
cgit v1.2.3


From 9adb62a5df9c0fbef7b4665919329f73a34651ed Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@huawei.com>
Date: Tue, 31 Jul 2012 16:43:28 -0700
Subject: mm/hotplug: correctly setup fallback zonelists when creating new
 pgdat

When hotadd_new_pgdat() is called to create new pgdat for a new node, a
fallback zonelist should be created for the new node.  There's code to try
to achieve that in hotadd_new_pgdat() as below:

	/*
	 * The node we allocated has no zone fallback lists. For avoiding
	 * to access not-initialized zonelist, build here.
	 */
	mutex_lock(&zonelists_mutex);
	build_all_zonelists(pgdat, NULL);
	mutex_unlock(&zonelists_mutex);

But it doesn't work as expected.  When hotadd_new_pgdat() is called, the
new node is still in offline state because node_set_online(nid) hasn't
been called yet.  And build_all_zonelists() only builds zonelists for
online nodes as:

        for_each_online_node(nid) {
                pg_data_t *pgdat = NODE_DATA(nid);

                build_zonelists(pgdat);
                build_zonelist_cache(pgdat);
        }

Though we hope to create zonelist for the new pgdat, but it doesn't.  So
add a new parameter "pgdat" the build_all_zonelists() to build pgdat for
the new pgdat too.

Signed-off-by: Jiang Liu <liuj97@gmail.com>
Signed-off-by: Xishi Qiu <qiuxishi@huawei.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Keping Chen <chenkeping@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f64afa5929fe..98f079bcf399 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -721,7 +721,7 @@ typedef struct pglist_data {
 #include <linux/memory_hotplug.h>
 
 extern struct mutex zonelists_mutex;
-void build_all_zonelists(void *data);
+void build_all_zonelists(pg_data_t *pgdat, struct zone *zone);
 void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
 bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 		int classzone_idx, int alloc_flags);
-- 
cgit v1.2.3


From 340175b7d14d5617559d0c1a54fa0ea204d9edcd Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@huawei.com>
Date: Tue, 31 Jul 2012 16:43:32 -0700
Subject: mm/hotplug: free zone->pageset when a zone becomes empty

When a zone becomes empty after memory offlining, free zone->pageset.
Otherwise it will cause memory leak when adding memory to the empty zone
again because build_all_zonelists() will allocate zone->pageset for an
empty zone.

Signed-off-by: Jiang Liu <liuj97@gmail.com>
Signed-off-by: Wei Wang <Bessel.Wang@huawei.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Keping Chen <chenkeping@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3955bedeeed1..7c6dfd2faa69 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1331,6 +1331,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
 extern void setup_per_cpu_pageset(void);
 
 extern void zone_pcp_update(struct zone *zone);
+extern void zone_pcp_reset(struct zone *zone);
 
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
-- 
cgit v1.2.3


From 62ce1c706f817cb9defef3ac2dfdd815149f2968 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 31 Jul 2012 16:43:39 -0700
Subject: mm, oom: move declaration for mem_cgroup_out_of_memory to oom.h

mem_cgroup_out_of_memory() is defined in mm/oom_kill.c, so declare it in
linux/oom.h rather than linux/memcontrol.h.

Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 --
 include/linux/oom.h        | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 170076222431..c0bff8976a69 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -72,8 +72,6 @@ extern void mem_cgroup_uncharge_end(void);
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_uncharge_cache_page(struct page *page);
 
-extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
-				     int order);
 bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
 				  struct mem_cgroup *memcg);
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg);
diff --git a/include/linux/oom.h b/include/linux/oom.h
index e4c29bc72e70..eb9dc14362c5 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -49,6 +49,8 @@ extern unsigned long oom_badness(struct task_struct *p,
 extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
 extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
 
+extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
+				     int order);
 extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
 		int order, nodemask_t *mask, bool force_kill);
 extern int register_oom_notifier(struct notifier_block *nb);
-- 
cgit v1.2.3


From 9cbb78bb314360a860a8b23723971cb6fcb54176 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 31 Jul 2012 16:43:44 -0700
Subject: mm, memcg: introduce own oom handler to iterate only over its own
 threads

The global oom killer is serialized by the per-zonelist
try_set_zonelist_oom() which is used in the page allocator.  Concurrent
oom kills are thus a rare event and only occur in systems using
mempolicies and with a large number of nodes.

Memory controller oom kills, however, can frequently be concurrent since
there is no serialization once the oom killer is called for oom conditions
in several different memcgs in parallel.

This creates a massive contention on tasklist_lock since the oom killer
requires the readside for the tasklist iteration.  If several memcgs are
calling the oom killer, this lock can be held for a substantial amount of
time, especially if threads continue to enter it as other threads are
exiting.

Since the exit path grabs the writeside of the lock with irqs disabled in
a few different places, this can cause a soft lockup on cpus as a result
of tasklist_lock starvation.

The kernel lacks unfair writelocks, and successful calls to the oom killer
usually result in at least one thread entering the exit path, so an
alternative solution is needed.

This patch introduces a seperate oom handler for memcgs so that they do
not require tasklist_lock for as much time.  Instead, it iterates only
over the threads attached to the oom memcg and grabs a reference to the
selected thread before calling oom_kill_process() to ensure it doesn't
prematurely exit.

This still requires tasklist_lock for the tasklist dump, iterating
children of the selected process, and killing all other threads on the
system sharing the same memory as the selected victim.  So while this
isn't a complete solution to tasklist_lock starvation, it significantly
reduces the amount of time that it is held.

Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Sha Zhengju <handai.szj@taobao.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  9 ++-------
 include/linux/oom.h        | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c0bff8976a69..2a80544aec99 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -180,7 +180,8 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 						gfp_t gfp_mask,
 						unsigned long *total_scanned);
-u64 mem_cgroup_get_limit(struct mem_cgroup *memcg);
+extern void __mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
+				       int order);
 
 void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -364,12 +365,6 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 	return 0;
 }
 
-static inline
-u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
-{
-	return 0;
-}
-
 static inline void mem_cgroup_split_huge_fixup(struct page *head)
 {
 }
diff --git a/include/linux/oom.h b/include/linux/oom.h
index eb9dc14362c5..5dc0e384ae9e 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -40,17 +40,33 @@ enum oom_constraint {
 	CONSTRAINT_MEMCG,
 };
 
+enum oom_scan_t {
+	OOM_SCAN_OK,		/* scan thread and find its badness */
+	OOM_SCAN_CONTINUE,	/* do not consider thread for oom kill */
+	OOM_SCAN_ABORT,		/* abort the iteration and return */
+	OOM_SCAN_SELECT,	/* always select this thread first */
+};
+
 extern void compare_swap_oom_score_adj(int old_val, int new_val);
 extern int test_set_oom_score_adj(int new_val);
 
 extern unsigned long oom_badness(struct task_struct *p,
 		struct mem_cgroup *memcg, const nodemask_t *nodemask,
 		unsigned long totalpages);
+extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+			     unsigned int points, unsigned long totalpages,
+			     struct mem_cgroup *memcg, nodemask_t *nodemask,
+			     const char *message);
+
 extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
 extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
 
+extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
+		unsigned long totalpages, const nodemask_t *nodemask,
+		bool force_kill);
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 				     int order);
+
 extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
 		int order, nodemask_t *mask, bool force_kill);
 extern int register_oom_notifier(struct notifier_block *nb);
-- 
cgit v1.2.3


From 876aafbfd9ba5bb352f1b14622c27f3fe9a99013 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 31 Jul 2012 16:43:48 -0700
Subject: mm, memcg: move all oom handling to memcontrol.c

By globally defining check_panic_on_oom(), the memcg oom handler can be
moved entirely to mm/memcontrol.c.  This removes the ugly #ifdef in the
oom killer and cleans up the code.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 --
 include/linux/oom.h        | 3 +++
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 2a80544aec99..5a3ee6423634 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -180,8 +180,6 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 						gfp_t gfp_mask,
 						unsigned long *total_scanned);
-extern void __mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
-				       int order);
 
 void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 5dc0e384ae9e..49a3031fda50 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -61,6 +61,9 @@ extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
 extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
 
+extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
+			       int order, const nodemask_t *nodemask);
+
 extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 		unsigned long totalpages, const nodemask_t *nodemask,
 		bool force_kill);
-- 
cgit v1.2.3


From ee6f509c3274014d1f52e7a7a10aee9f85393c5e Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 31 Jul 2012 16:43:50 -0700
Subject: mm: factor out memory isolate functions

mm/page_alloc.c has some memory isolation functions but they are used only
when we enable CONFIG_{CMA|MEMORY_HOTPLUG|MEMORY_FAILURE}.  So let's make
it configurable by new CONFIG_MEMORY_ISOLATION so that it can reduce
binary size and we can check it simple by CONFIG_MEMORY_ISOLATION, not if
defined CONFIG_{CMA|MEMORY_HOTPLUG|MEMORY_FAILURE}.

Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-isolation.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 3bdcab30ca41..105077aa7685 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -1,6 +1,11 @@
 #ifndef __LINUX_PAGEISOLATION_H
 #define __LINUX_PAGEISOLATION_H
 
+
+bool has_unmovable_pages(struct zone *zone, struct page *page, int count);
+void set_pageblock_migratetype(struct page *page, int migratetype);
+int move_freepages_block(struct zone *zone, struct page *page,
+				int migratetype);
 /*
  * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE.
  * If specified range includes migrate types other than MOVABLE or CMA,
@@ -10,7 +15,7 @@
  * free all pages in the range. test_page_isolated() can be used for
  * test it.
  */
-extern int
+int
 start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 			 unsigned migratetype);
 
@@ -18,7 +23,7 @@ start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
  * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
  * target range is [start_pfn, end_pfn)
  */
-extern int
+int
 undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 			unsigned migratetype);
 
@@ -30,8 +35,8 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn);
 /*
  * Internal functions. Changes pageblock's migrate type.
  */
-extern int set_migratetype_isolate(struct page *page);
-extern void unset_migratetype_isolate(struct page *page, unsigned migratetype);
+int set_migratetype_isolate(struct page *page);
+void unset_migratetype_isolate(struct page *page, unsigned migratetype);
 
 
 #endif
-- 
cgit v1.2.3


From 702d1a6e0766d45642c934444fd41f658d251305 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 31 Jul 2012 16:43:56 -0700
Subject: memory-hotplug: fix kswapd looping forever problem

When hotplug offlining happens on zone A, it starts to mark freed page as
MIGRATE_ISOLATE type in buddy for preventing further allocation.
(MIGRATE_ISOLATE is very irony type because it's apparently on buddy but
we can't allocate them).

When the memory shortage happens during hotplug offlining, current task
starts to reclaim, then wake up kswapd.  Kswapd checks watermark, then go
sleep because current zone_watermark_ok_safe doesn't consider
MIGRATE_ISOLATE freed page count.  Current task continue to reclaim in
direct reclaim path without kswapd's helping.  The problem is that
zone->all_unreclaimable is set by only kswapd so that current task would
be looping forever like below.

__alloc_pages_slowpath
restart:
	wake_all_kswapd
rebalance:
	__alloc_pages_direct_reclaim
		do_try_to_free_pages
			if global_reclaim && !all_unreclaimable
				return 1; /* It means we did did_some_progress */
	skip __alloc_pages_may_oom
	should_alloc_retry
		goto rebalance;

If we apply KOSAKI's patch[1] which doesn't depends on kswapd about
setting zone->all_unreclaimable, we can solve this problem by killing some
task in direct reclaim path.  But it doesn't wake up kswapd, still.  It
could be a problem still if other subsystem needs GFP_ATOMIC request.  So
kswapd should consider MIGRATE_ISOLATE when it calculate free pages BEFORE
going sleep.

This patch counts the number of MIGRATE_ISOLATE page block and
zone_watermark_ok_safe will consider it if the system has such blocks
(fortunately, it's very rare so no problem in POV overhead and kswapd is
never hotpath).

Copy/modify from Mel's quote
"
Ideal solution would be "allocating" the pageblock.
It would keep the free space accounting as it is but historically,
memory hotplug didn't allocate pages because it would be difficult to
detect if a pageblock was isolated or if part of some balloon.
Allocating just full pageblocks would work around this, However,
it would play very badly with CMA.
"

[1] http://lkml.org/lkml/2012/6/14/74

[akpm@linux-foundation.org: simplify nr_zone_isolate_freepages(), rework zone_watermark_ok_safe() comment, simplify set_pageblock_isolate() and restore_pageblock_isolate()]
[akpm@linux-foundation.org: fix CONFIG_MEMORY_ISOLATION=n build]
Signed-off-by: Minchan Kim <minchan@kernel.org>
Suggested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Tested-by: Aaditya Kumar <aaditya.kumar.30@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 98f079bcf399..64b2c3a48286 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -478,6 +478,14 @@ struct zone {
 	 * rarely used fields:
 	 */
 	const char		*name;
+#ifdef CONFIG_MEMORY_ISOLATION
+	/*
+	 * the number of MIGRATE_ISOLATE *pageblock*.
+	 * We need this for free page counting. Look at zone_watermark_ok_safe.
+	 * It's protected by zone->lock
+	 */
+	int		nr_pageblock_isolate;
+#endif
 } ____cacheline_internodealigned_in_smp;
 
 typedef enum {
-- 
cgit v1.2.3


From 072bb0aa5e062902968c5c1007bba332c7820cf4 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 31 Jul 2012 16:43:58 -0700
Subject: mm: sl[au]b: add knowledge of PFMEMALLOC reserve pages

When a user or administrator requires swap for their application, they
create a swap partition and file, format it with mkswap and activate it
with swapon.  Swap over the network is considered as an option in diskless
systems.  The two likely scenarios are when blade servers are used as part
of a cluster where the form factor or maintenance costs do not allow the
use of disks and thin clients.

The Linux Terminal Server Project recommends the use of the Network Block
Device (NBD) for swap according to the manual at
https://sourceforge.net/projects/ltsp/files/Docs-Admin-Guide/LTSPManual.pdf/download
There is also documentation and tutorials on how to setup swap over NBD at
places like https://help.ubuntu.com/community/UbuntuLTSP/EnableNBDSWAP The
nbd-client also documents the use of NBD as swap.  Despite this, the fact
is that a machine using NBD for swap can deadlock within minutes if swap
is used intensively.  This patch series addresses the problem.

The core issue is that network block devices do not use mempools like
normal block devices do.  As the host cannot control where they receive
packets from, they cannot reliably work out in advance how much memory
they might need.  Some years ago, Peter Zijlstra developed a series of
patches that supported swap over an NFS that at least one distribution is
carrying within their kernels.  This patch series borrows very heavily
from Peter's work to support swapping over NBD as a pre-requisite to
supporting swap-over-NFS.  The bulk of the complexity is concerned with
preserving memory that is allocated from the PFMEMALLOC reserves for use
by the network layer which is needed for both NBD and NFS.

Patch 1 adds knowledge of the PFMEMALLOC reserves to SLAB and SLUB to
	preserve access to pages allocated under low memory situations
	to callers that are freeing memory.

Patch 2 optimises the SLUB fast path to avoid pfmemalloc checks

Patch 3 introduces __GFP_MEMALLOC to allow access to the PFMEMALLOC
	reserves without setting PFMEMALLOC.

Patch 4 opens the possibility for softirqs to use PFMEMALLOC reserves
	for later use by network packet processing.

Patch 5 only sets page->pfmemalloc when ALLOC_NO_WATERMARKS was required

Patch 6 ignores memory policies when ALLOC_NO_WATERMARKS is set.

Patches 7-12 allows network processing to use PFMEMALLOC reserves when
	the socket has been marked as being used by the VM to clean pages. If
	packets are received and stored in pages that were allocated under
	low-memory situations and are unrelated to the VM, the packets
	are dropped.

	Patch 11 reintroduces __skb_alloc_page which the networking
	folk may object to but is needed in some cases to propogate
	pfmemalloc from a newly allocated page to an skb. If there is a
	strong objection, this patch can be dropped with the impact being
	that swap-over-network will be slower in some cases but it should
	not fail.

Patch 13 is a micro-optimisation to avoid a function call in the
	common case.

Patch 14 tags NBD sockets as being SOCK_MEMALLOC so they can use
	PFMEMALLOC if necessary.

Patch 15 notes that it is still possible for the PFMEMALLOC reserve
	to be depleted. To prevent this, direct reclaimers get throttled on
	a waitqueue if 50% of the PFMEMALLOC reserves are depleted.  It is
	expected that kswapd and the direct reclaimers already running
	will clean enough pages for the low watermark to be reached and
	the throttled processes are woken up.

Patch 16 adds a statistic to track how often processes get throttled

Some basic performance testing was run using kernel builds, netperf on
loopback for UDP and TCP, hackbench (pipes and sockets), iozone and
sysbench.  Each of them were expected to use the sl*b allocators
reasonably heavily but there did not appear to be significant performance
variances.

For testing swap-over-NBD, a machine was booted with 2G of RAM with a
swapfile backed by NBD.  8*NUM_CPU processes were started that create
anonymous memory mappings and read them linearly in a loop.  The total
size of the mappings were 4*PHYSICAL_MEMORY to use swap heavily under
memory pressure.

Without the patches and using SLUB, the machine locks up within minutes
and runs to completion with them applied.  With SLAB, the story is
different as an unpatched kernel run to completion.  However, the patched
kernel completed the test 45% faster.

MICRO
                                         3.5.0-rc2 3.5.0-rc2
					 vanilla     swapnbd
Unrecognised test vmscan-anon-mmap-write
MMTests Statistics: duration
Sys Time Running Test (seconds)             197.80    173.07
User+Sys Time Running Test (seconds)        206.96    182.03
Total Elapsed Time (seconds)               3240.70   1762.09

This patch: mm: sl[au]b: add knowledge of PFMEMALLOC reserve pages

Allocations of pages below the min watermark run a risk of the machine
hanging due to a lack of memory.  To prevent this, only callers who have
PF_MEMALLOC or TIF_MEMDIE set and are not processing an interrupt are
allowed to allocate with ALLOC_NO_WATERMARKS.  Once they are allocated to
a slab though, nothing prevents other callers consuming free objects
within those slabs.  This patch limits access to slab pages that were
alloced from the PFMEMALLOC reserves.

When this patch is applied, pages allocated from below the low watermark
are returned with page->pfmemalloc set and it is up to the caller to
determine how the page should be protected.  SLAB restricts access to any
page with page->pfmemalloc set to callers which are known to able to
access the PFMEMALLOC reserve.  If one is not available, an attempt is
made to allocate a new page rather than use a reserve.  SLUB is a bit more
relaxed in that it only records if the current per-CPU page was allocated
from PFMEMALLOC reserve and uses another partial slab if the caller does
not have the necessary GFP or process flags.  This was found to be
sufficient in tests to avoid hangs due to SLUB generally maintaining
smaller lists than SLAB.

In low-memory conditions it does mean that !PFMEMALLOC allocators can fail
a slab allocation even though free objects are available because they are
being preserved for callers that are freeing pages.

[a.p.zijlstra@chello.nl: Original implementation]
[sebastian@breakpoint.cc: Correct order of page flag clearing]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: David Miller <davem@davemloft.net>
Cc: Neil Brown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h   |  9 +++++++++
 include/linux/page-flags.h | 29 +++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 074eb98fe15d..375e79eb009b 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -54,6 +54,15 @@ struct page {
 		union {
 			pgoff_t index;		/* Our offset within mapping. */
 			void *freelist;		/* slub/slob first free object */
+			bool pfmemalloc;	/* If set by the page allocator,
+						 * ALLOC_PFMEMALLOC was set
+						 * and the low watermark was not
+						 * met implying that the system
+						 * is under some pressure. The
+						 * caller should try ensure
+						 * this page is only used to
+						 * free other pages.
+						 */
 		};
 
 		union {
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index c88d2a9451af..b5d13841604e 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -7,6 +7,7 @@
 
 #include <linux/types.h>
 #include <linux/bug.h>
+#include <linux/mmdebug.h>
 #ifndef __GENERATING_BOUNDS_H
 #include <linux/mm_types.h>
 #include <generated/bounds.h>
@@ -453,6 +454,34 @@ static inline int PageTransTail(struct page *page)
 }
 #endif
 
+/*
+ * If network-based swap is enabled, sl*b must keep track of whether pages
+ * were allocated from pfmemalloc reserves.
+ */
+static inline int PageSlabPfmemalloc(struct page *page)
+{
+	VM_BUG_ON(!PageSlab(page));
+	return PageActive(page);
+}
+
+static inline void SetPageSlabPfmemalloc(struct page *page)
+{
+	VM_BUG_ON(!PageSlab(page));
+	SetPageActive(page);
+}
+
+static inline void __ClearPageSlabPfmemalloc(struct page *page)
+{
+	VM_BUG_ON(!PageSlab(page));
+	__ClearPageActive(page);
+}
+
+static inline void ClearPageSlabPfmemalloc(struct page *page)
+{
+	VM_BUG_ON(!PageSlab(page));
+	ClearPageActive(page);
+}
+
 #ifdef CONFIG_MMU
 #define __PG_MLOCKED		(1 << PG_mlocked)
 #else
-- 
cgit v1.2.3


From b37f1dd0f543d9714f96c2f9b9f74f7bdfdfdf31 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 31 Jul 2012 16:44:03 -0700
Subject: mm: introduce __GFP_MEMALLOC to allow access to emergency reserves

__GFP_MEMALLOC will allow the allocation to disregard the watermarks, much
like PF_MEMALLOC.  It allows one to pass along the memalloc state in
object related allocation flags as opposed to task related flags, such as
sk->sk_allocation.  This removes the need for ALLOC_PFMEMALLOC as callers
using __GFP_MEMALLOC can get the ALLOC_NO_WATERMARK flag which is now
enough to identify allocations related to page reclaim.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: David Miller <davem@davemloft.net>
Cc: Neil Brown <neilb@suse.de>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h      | 10 ++++++++--
 include/linux/mm_types.h |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 1e49be49d324..cbd7400e5862 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -23,6 +23,7 @@ struct vm_area_struct;
 #define ___GFP_REPEAT		0x400u
 #define ___GFP_NOFAIL		0x800u
 #define ___GFP_NORETRY		0x1000u
+#define ___GFP_MEMALLOC		0x2000u
 #define ___GFP_COMP		0x4000u
 #define ___GFP_ZERO		0x8000u
 #define ___GFP_NOMEMALLOC	0x10000u
@@ -76,9 +77,14 @@ struct vm_area_struct;
 #define __GFP_REPEAT	((__force gfp_t)___GFP_REPEAT)	/* See above */
 #define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)	/* See above */
 #define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY) /* See above */
+#define __GFP_MEMALLOC	((__force gfp_t)___GFP_MEMALLOC)/* Allow access to emergency reserves */
 #define __GFP_COMP	((__force gfp_t)___GFP_COMP)	/* Add compound page metadata */
 #define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)	/* Return zeroed page on success */
-#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves */
+#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves.
+							 * This takes precedence over the
+							 * __GFP_MEMALLOC flag if both are
+							 * set
+							 */
 #define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */
 #define __GFP_THISNODE	((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */
 #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
@@ -129,7 +135,7 @@ struct vm_area_struct;
 /* Control page allocator reclaim behavior */
 #define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
 			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
-			__GFP_NORETRY|__GFP_NOMEMALLOC)
+			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
 
 /* Control slab gfp mask during early boot */
 #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS))
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 375e79eb009b..bf7867200b95 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -55,7 +55,7 @@ struct page {
 			pgoff_t index;		/* Our offset within mapping. */
 			void *freelist;		/* slub/slob first free object */
 			bool pfmemalloc;	/* If set by the page allocator,
-						 * ALLOC_PFMEMALLOC was set
+						 * ALLOC_NO_WATERMARKS was set
 						 * and the low watermark was not
 						 * met implying that the system
 						 * is under some pressure. The
-- 
cgit v1.2.3


From 907aed48f65efeecf91575397e3d79335d93a466 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 31 Jul 2012 16:44:07 -0700
Subject: mm: allow PF_MEMALLOC from softirq context

This is needed to allow network softirq packet processing to make use of
PF_MEMALLOC.

Currently softirq context cannot use PF_MEMALLOC due to it not being
associated with a task, and therefore not having task flags to fiddle with
- thus the gfp to alloc flag mapping ignores the task flags when in
interrupts (hard or soft) context.

Allowing softirqs to make use of PF_MEMALLOC therefore requires some
trickery.  This patch borrows the task flags from whatever process happens
to be preempted by the softirq.  It then modifies the gfp to alloc flags
mapping to not exclude task flags in softirq context, and modify the
softirq code to save, clear and restore the PF_MEMALLOC flag.

The save and clear, ensures the preempted task's PF_MEMALLOC flag doesn't
leak into the softirq.  The restore ensures a softirq's PF_MEMALLOC flag
cannot leak back into the preempted process.  This should be safe due to
the following reasons

Softirqs can run on multiple CPUs sure but the same task should not be
	executing the same softirq code. Neither should the softirq
	handler be preempted by any other softirq handler so the flags
	should not leak to an unrelated softirq.

Softirqs re-enable hardware interrupts in __do_softirq() so can be
	preempted by hardware interrupts so PF_MEMALLOC is inherited
	by the hard IRQ. However, this is similar to a process in
	reclaim being preempted by a hardirq. While PF_MEMALLOC is
	set, gfp_to_alloc_flags() distinguishes between hard and
	soft irqs and avoids giving a hardirq the ALLOC_NO_WATERMARKS
	flag.

If the softirq is deferred to ksoftirq then its flags may be used
        instead of a normal tasks but as the softirq cannot be preempted,
        the PF_MEMALLOC flag does not leak to other code by accident.

[davem@davemloft.net: Document why PF_MEMALLOC is safe]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: David Miller <davem@davemloft.net>
Cc: Neil Brown <neilb@suse.de>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 865725adb9d3..c147e7024f11 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1894,6 +1894,13 @@ static inline void rcu_copy_process(struct task_struct *p)
 
 #endif
 
+static inline void tsk_restore_flags(struct task_struct *task,
+				unsigned long orig_flags, unsigned long flags)
+{
+	task->flags &= ~flags;
+	task->flags |= orig_flags & flags;
+}
+
 #ifdef CONFIG_SMP
 extern void do_set_cpus_allowed(struct task_struct *p,
 			       const struct cpumask *new_mask);
-- 
cgit v1.2.3


From c93bdd0e03e848555d144eb44a1f275b871a8dd5 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 31 Jul 2012 16:44:19 -0700
Subject: netvm: allow skb allocation to use PFMEMALLOC reserves

Change the skb allocation API to indicate RX usage and use this to fall
back to the PFMEMALLOC reserve when needed.  SKBs allocated from the
reserve are tagged in skb->pfmemalloc.  If an SKB is allocated from the
reserve and the socket is later found to be unrelated to page reclaim, the
packet is dropped so that the memory remains available for page reclaim.
Network protocols are expected to recover from this packet loss.

[a.p.zijlstra@chello.nl: Ideas taken from various patches]
[davem@davemloft.net: Use static branches, coding style corrections]
[sebastian@breakpoint.cc: Avoid unnecessary cast, fix !CONFIG_NET build]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Neil Brown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h    |  3 +++
 include/linux/skbuff.h | 14 ++++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index cbd7400e5862..4883f393f50a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -385,6 +385,9 @@ void drain_local_pages(void *dummy);
  */
 extern gfp_t gfp_allowed_mask;
 
+/* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */
+bool gfp_pfmemalloc_allowed(gfp_t gfp_mask);
+
 extern void pm_restrict_gfp_mask(void);
 extern void pm_restore_gfp_mask(void);
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d205c4be7f5b..0336f02e3667 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -462,6 +462,7 @@ struct sk_buff {
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
+	__u8			pfmemalloc:1;
 	__u8			ooo_okay:1;
 	__u8			l4_rxhash:1;
 	__u8			wifi_acked_valid:1;
@@ -502,6 +503,15 @@ struct sk_buff {
 #include <linux/slab.h>
 
 
+#define SKB_ALLOC_FCLONE	0x01
+#define SKB_ALLOC_RX		0x02
+
+/* Returns true if the skb was allocated from PFMEMALLOC reserves */
+static inline bool skb_pfmemalloc(const struct sk_buff *skb)
+{
+	return unlikely(skb->pfmemalloc);
+}
+
 /*
  * skb might have a dst pointer attached, refcounted or not.
  * _skb_refdst low order bit is set if refcount was _not_ taken
@@ -565,7 +575,7 @@ extern bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 			     bool *fragstolen, int *delta_truesize);
 
 extern struct sk_buff *__alloc_skb(unsigned int size,
-				   gfp_t priority, int fclone, int node);
+				   gfp_t priority, int flags, int node);
 extern struct sk_buff *build_skb(void *data, unsigned int frag_size);
 static inline struct sk_buff *alloc_skb(unsigned int size,
 					gfp_t priority)
@@ -576,7 +586,7 @@ static inline struct sk_buff *alloc_skb(unsigned int size,
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 					       gfp_t priority)
 {
-	return __alloc_skb(size, priority, 1, NUMA_NO_NODE);
+	return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE);
 }
 
 extern void skb_recycle(struct sk_buff *skb);
-- 
cgit v1.2.3


From c48a11c7ad2623b99bbd6859b0b4234e7f11176f Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 31 Jul 2012 16:44:23 -0700
Subject: netvm: propagate page->pfmemalloc to skb

The skb->pfmemalloc flag gets set to true iff during the slab allocation
of data in __alloc_skb that the the PFMEMALLOC reserves were used.  If the
packet is fragmented, it is possible that pages will be allocated from the
PFMEMALLOC reserve without propagating this information to the skb.  This
patch propagates page->pfmemalloc from pages allocated for fragments to
the skb.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Neil Brown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/skbuff.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0336f02e3667..b814bb8fd5ab 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1247,6 +1247,17 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
 {
 	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
+	/*
+	 * Propagate page->pfmemalloc to the skb if we can. The problem is
+	 * that not all callers have unique ownership of the page. If
+	 * pfmemalloc is set, we check the mapping as a mapping implies
+	 * page->index is set (index and pfmemalloc share space).
+	 * If it's a valid mapping, we cannot use page->pfmemalloc but we
+	 * do not lose pfmemalloc information as the pages would not be
+	 * allocated using __GFP_MEMALLOC.
+	 */
+	if (page->pfmemalloc && !page->mapping)
+		skb->pfmemalloc	= true;
 	frag->page.p		  = page;
 	frag->page_offset	  = off;
 	skb_frag_size_set(frag, size);
-- 
cgit v1.2.3


From 0614002bb5f7411e61ffa0dfe5be1f2c84df3da3 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 31 Jul 2012 16:44:24 -0700
Subject: netvm: propagate page->pfmemalloc from skb_alloc_page to skb

The skb->pfmemalloc flag gets set to true iff during the slab allocation
of data in __alloc_skb that the the PFMEMALLOC reserves were used.  If
page splitting is used, it is possible that pages will be allocated from
the PFMEMALLOC reserve without propagating this information to the skb.
This patch propagates page->pfmemalloc from pages allocated for fragments
to the skb.

It works by reintroducing and expanding the skb_alloc_page() API to take
an skb.  If the page was allocated from pfmemalloc reserves, it is
automatically copied.  If the driver allocates the page before the skb, it
should call skb_propagate_pfmemalloc() after the skb is allocated to
ensure the flag is copied properly.

Failure to do so is not critical.  The resulting driver may perform slower
if it is used for swap-over-NBD or swap-over-NFS but it should not result
in failure.

[davem@davemloft.net: API rename and consistency]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Neil Brown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/skbuff.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b814bb8fd5ab..7632c87da2c9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1774,6 +1774,61 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
 	return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
 }
 
+/*
+ *	__skb_alloc_page - allocate pages for ps-rx on a skb and preserve pfmemalloc data
+ *	@gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
+ *	@skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
+ *	@order: size of the allocation
+ *
+ * 	Allocate a new page.
+ *
+ * 	%NULL is returned if there is no free memory.
+*/
+static inline struct page *__skb_alloc_pages(gfp_t gfp_mask,
+					      struct sk_buff *skb,
+					      unsigned int order)
+{
+	struct page *page;
+
+	gfp_mask |= __GFP_COLD;
+
+	if (!(gfp_mask & __GFP_NOMEMALLOC))
+		gfp_mask |= __GFP_MEMALLOC;
+
+	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
+	if (skb && page && page->pfmemalloc)
+		skb->pfmemalloc = true;
+
+	return page;
+}
+
+/**
+ *	__skb_alloc_page - allocate a page for ps-rx for a given skb and preserve pfmemalloc data
+ *	@gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
+ *	@skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
+ *
+ * 	Allocate a new page.
+ *
+ * 	%NULL is returned if there is no free memory.
+ */
+static inline struct page *__skb_alloc_page(gfp_t gfp_mask,
+					     struct sk_buff *skb)
+{
+	return __skb_alloc_pages(gfp_mask, skb, 0);
+}
+
+/**
+ *	skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page
+ *	@page: The page that was allocated from skb_alloc_page
+ *	@skb: The skb that may need pfmemalloc set
+ */
+static inline void skb_propagate_pfmemalloc(struct page *page,
+					     struct sk_buff *skb)
+{
+	if (page && page->pfmemalloc)
+		skb->pfmemalloc = true;
+}
+
 /**
  * skb_frag_page - retrieve the page refered to by a paged fragment
  * @frag: the paged fragment
-- 
cgit v1.2.3


From 5515061d22f0f9976ae7815864bfd22042d36848 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 31 Jul 2012 16:44:35 -0700
Subject: mm: throttle direct reclaimers if PF_MEMALLOC reserves are low and
 swap is backed by network storage

If swap is backed by network storage such as NBD, there is a risk that a
large number of reclaimers can hang the system by consuming all
PF_MEMALLOC reserves.  To avoid these hangs, the administrator must tune
min_free_kbytes in advance which is a bit fragile.

This patch throttles direct reclaimers if half the PF_MEMALLOC reserves
are in use.  If the system is routinely getting throttled the system
administrator can increase min_free_kbytes so degradation is smoother but
the system will keep running.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: David Miller <davem@davemloft.net>
Cc: Neil Brown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 64b2c3a48286..2daa54f55db7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -705,6 +705,7 @@ typedef struct pglist_data {
 					     range, including holes */
 	int node_id;
 	wait_queue_head_t kswapd_wait;
+	wait_queue_head_t pfmemalloc_wait;
 	struct task_struct *kswapd;	/* Protected by lock_memory_hotplug() */
 	int kswapd_max_order;
 	enum zone_type classzone_idx;
-- 
cgit v1.2.3


From 68243e76ee343d63c6cf76978588a885951e2818 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 31 Jul 2012 16:44:39 -0700
Subject: mm: account for the number of times direct reclaimers get throttled

Under significant pressure when writing back to network-backed storage,
direct reclaimers may get throttled.  This is expected to be a short-lived
event and the processes get woken up again but processes do get stalled.
This patch counts how many times such stalling occurs.  It's up to the
administrator whether to reduce these stalls by increasing
min_free_kbytes.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: David Miller <davem@davemloft.net>
Cc: Neil Brown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vm_event_item.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 06f8e3858251..57f7b1091511 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -30,6 +30,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		FOR_ALL_ZONES(PGSTEAL_DIRECT),
 		FOR_ALL_ZONES(PGSCAN_KSWAPD),
 		FOR_ALL_ZONES(PGSCAN_DIRECT),
+		PGSCAN_DIRECT_THROTTLE,
 #ifdef CONFIG_NUMA
 		PGSCAN_ZONE_RECLAIM_FAILED,
 #endif
-- 
cgit v1.2.3


From f981c5950fa85916ba49bea5d9a7a5078f47e569 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 31 Jul 2012 16:44:47 -0700
Subject: mm: methods for teaching filesystems about PG_swapcache pages

In order to teach filesystems to handle swap cache pages, three new page
functions are introduced:

  pgoff_t page_file_index(struct page *);
  loff_t page_file_offset(struct page *);
  struct address_space *page_file_mapping(struct page *);

page_file_index() - gives the offset of this page in the file in
PAGE_CACHE_SIZE blocks.  Like page->index is for mapped pages, this
function also gives the correct index for PG_swapcache pages.

page_file_offset() - uses page_file_index(), so that it will give the
expected result, even for PG_swapcache pages.

page_file_mapping() - gives the mapping backing the actual page; that is
for swap cache pages it will give swap_file->f_mapping.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Paris <eparis@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Neil Brown <neilb@suse.de>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Xiaotian Feng <dfeng@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h      | 25 +++++++++++++++++++++++++
 include/linux/pagemap.h |  5 +++++
 include/linux/swap.h    |  1 +
 3 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7c6dfd2faa69..7cdac1676b59 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -805,6 +805,17 @@ static inline void *page_rmapping(struct page *page)
 	return (void *)((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);
 }
 
+extern struct address_space *__page_file_mapping(struct page *);
+
+static inline
+struct address_space *page_file_mapping(struct page *page)
+{
+	if (unlikely(PageSwapCache(page)))
+		return __page_file_mapping(page);
+
+	return page->mapping;
+}
+
 static inline int PageAnon(struct page *page)
 {
 	return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
@@ -821,6 +832,20 @@ static inline pgoff_t page_index(struct page *page)
 	return page->index;
 }
 
+extern pgoff_t __page_file_index(struct page *page);
+
+/*
+ * Return the file index of the page. Regular pagecache pages use ->index
+ * whereas swapcache pages use swp_offset(->private)
+ */
+static inline pgoff_t page_file_index(struct page *page)
+{
+	if (unlikely(PageSwapCache(page)))
+		return __page_file_index(page);
+
+	return page->index;
+}
+
 /*
  * Return true if this page is mapped into pagetables.
  */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 7cfad3bbb0cc..e42c762f0dc7 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -286,6 +286,11 @@ static inline loff_t page_offset(struct page *page)
 	return ((loff_t)page->index) << PAGE_CACHE_SHIFT;
 }
 
+static inline loff_t page_file_offset(struct page *page)
+{
+	return ((loff_t)page_file_index(page)) << PAGE_CACHE_SHIFT;
+}
+
 extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
 				     unsigned long address);
 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 9a16bb1cefd1..e62425ded2ed 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -356,6 +356,7 @@ extern unsigned int count_swap_pages(int, int);
 extern sector_t map_swap_page(struct page *, struct block_device **);
 extern sector_t swapdev_block(int, pgoff_t);
 extern int page_swapcount(struct page *);
+extern struct swap_info_struct *page_swap_info(struct page *);
 extern int reuse_swap_page(struct page *);
 extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
-- 
cgit v1.2.3


From 18022c5d8627a7a9ba8097a0f238b513fae6f5b8 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 31 Jul 2012 16:44:51 -0700
Subject: mm: add get_kernel_page[s] for pinning of kernel addresses for I/O

This patch adds two new APIs get_kernel_pages() and get_kernel_page() that
may be used to pin a vector of kernel addresses for IO.  The initial user
is expected to be NFS for allowing pages to be written to swap using
aops->direct_IO().  Strictly speaking, swap-over-NFS only needs to pin one
page for IO but it makes sense to express the API in terms of a vector and
add a helper for pinning single pages.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Paris <eparis@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Neil Brown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Xiaotian Feng <dfeng@redhat.com>
Cc: Mark Salter <msalter@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/blk_types.h | 2 ++
 include/linux/fs.h        | 2 ++
 include/linux/mm.h        | 4 ++++
 3 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0edb65dd8edd..7b7ac9ccec7a 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -160,6 +160,7 @@ enum rq_flag_bits {
 	__REQ_FLUSH_SEQ,	/* request for flush sequence */
 	__REQ_IO_STAT,		/* account I/O stat */
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
+	__REQ_KERNEL, 		/* direct IO to kernel pages */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -201,5 +202,6 @@ enum rq_flag_bits {
 #define REQ_IO_STAT		(1 << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE		(1 << __REQ_MIXED_MERGE)
 #define REQ_SECURE		(1 << __REQ_SECURE)
+#define REQ_KERNEL		(1 << __REQ_KERNEL)
 
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8fabb037a48d..9d77309da153 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -165,6 +165,8 @@ struct inodes_stat_t {
 #define READ			0
 #define WRITE			RW_MASK
 #define READA			RWA_MASK
+#define KERNEL_READ		(READ|REQ_KERNEL)
+#define KERNEL_WRITE		(WRITE|REQ_KERNEL)
 
 #define READ_SYNC		(READ | REQ_SYNC)
 #define WRITE_SYNC		(WRITE | REQ_SYNC | REQ_NOIDLE)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7cdac1676b59..bd079a1b0fdc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1019,6 +1019,10 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			struct page **pages, struct vm_area_struct **vmas);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
+struct kvec;
+int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
+			struct page **pages);
+int get_kernel_page(unsigned long start, int write, struct page **pages);
 struct page *get_dump_page(unsigned long addr);
 
 extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
-- 
cgit v1.2.3


From 62c230bc1790923a1b35da03596a68a6c9b5b100 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 31 Jul 2012 16:44:55 -0700
Subject: mm: add support for a filesystem to activate swap files and use
 direct_IO for writing swap pages

Currently swapfiles are managed entirely by the core VM by using ->bmap to
allocate space and write to the blocks directly.  This effectively ensures
that the underlying blocks are allocated and avoids the need for the swap
subsystem to locate what physical blocks store offsets within a file.

If the swap subsystem is to use the filesystem information to locate the
blocks, it is critical that information such as block groups, block
bitmaps and the block descriptor table that map the swap file were
resident in memory.  This patch adds address_space_operations that the VM
can call when activating or deactivating swap backed by a file.

  int swap_activate(struct file *);
  int swap_deactivate(struct file *);

The ->swap_activate() method is used to communicate to the file that the
VM relies on it, and the address_space should take adequate measures such
as reserving space in the underlying device, reserving memory for mempools
and pinning information such as the block descriptor table in memory.  The
->swap_deactivate() method is called on sys_swapoff() if ->swap_activate()
returned success.

After a successful swapfile ->swap_activate, the swapfile is marked
SWP_FILE and swapper_space.a_ops will proxy to
sis->swap_file->f_mappings->a_ops using ->direct_io to write swapcache
pages and ->readpage to read.

It is perfectly possible that direct_IO be used to read the swap pages but
it is an unnecessary complication.  Similarly, it is possible that
->writepage be used instead of direct_io to write the pages but filesystem
developers have stated that calling writepage from the VM is undesirable
for a variety of reasons and using direct_IO opens up the possibility of
writing back batches of swap pages in the future.

[a.p.zijlstra@chello.nl: Original patch]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Paris <eparis@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Neil Brown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Xiaotian Feng <dfeng@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h   | 4 ++++
 include/linux/swap.h | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9d77309da153..38356ab827c9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -638,6 +638,10 @@ struct address_space_operations {
 	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
 					unsigned long);
 	int (*error_remove_page)(struct address_space *, struct page *);
+
+	/* swapfile support */
+	int (*swap_activate)(struct file *file);
+	int (*swap_deactivate)(struct file *file);
 };
 
 extern const struct address_space_operations empty_aops;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index e62425ded2ed..ab230b1ebf61 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -151,6 +151,7 @@ enum {
 	SWP_SOLIDSTATE	= (1 << 4),	/* blkdev seeks are cheap */
 	SWP_CONTINUED	= (1 << 5),	/* swap_map has count continuation */
 	SWP_BLKDEV	= (1 << 6),	/* its a block device */
+	SWP_FILE	= (1 << 7),	/* set after swap_activate success */
 					/* add others here before... */
 	SWP_SCANNING	= (1 << 8),	/* refcount in scan_swap_map */
 };
@@ -320,6 +321,7 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct page *);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
+extern int swap_set_page_dirty(struct page *page);
 extern void end_swap_bio_read(struct bio *bio, int err);
 
 /* linux/mm/swap_state.c */
-- 
cgit v1.2.3


From a509bc1a9e487d952d9404318f7f990166ab57a7 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 31 Jul 2012 16:44:57 -0700
Subject: mm: swap: implement generic handler for swap_activate

The version of swap_activate introduced is sufficient for swap-over-NFS
but would not provide enough information to implement a generic handler.
This patch shuffles things slightly to ensure the same information is
available for aops->swap_activate() as is available to the core.

No functionality change.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Paris <eparis@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Neil Brown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Xiaotian Feng <dfeng@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h   | 6 ++++--
 include/linux/swap.h | 5 +++++
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 38356ab827c9..c8667f8b5358 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -429,6 +429,7 @@ struct kstatfs;
 struct vm_area_struct;
 struct vfsmount;
 struct cred;
+struct swap_info_struct;
 
 extern void __init inode_init(void);
 extern void __init inode_init_early(void);
@@ -640,8 +641,9 @@ struct address_space_operations {
 	int (*error_remove_page)(struct address_space *, struct page *);
 
 	/* swapfile support */
-	int (*swap_activate)(struct file *file);
-	int (*swap_deactivate)(struct file *file);
+	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
+				sector_t *span);
+	void (*swap_deactivate)(struct file *file);
 };
 
 extern const struct address_space_operations empty_aops;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index ab230b1ebf61..388e70601413 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -324,6 +324,11 @@ extern int swap_writepage(struct page *page, struct writeback_control *wbc);
 extern int swap_set_page_dirty(struct page *page);
 extern void end_swap_bio_read(struct bio *bio, int err);
 
+int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
+		unsigned long nr_pages, sector_t start_block);
+int generic_swapfile_activate(struct swap_info_struct *, struct file *,
+		sector_t *);
+
 /* linux/mm/swap_state.c */
 extern struct address_space swapper_space;
 #define total_swapcache_pages  swapper_space.nrpages
-- 
cgit v1.2.3


From 5a178119b0fbe37f7dfb602b37df9cc4b1dc9d71 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 31 Jul 2012 16:45:02 -0700
Subject: mm: add support for direct_IO to highmem pages

The patch "mm: add support for a filesystem to activate swap files and use
direct_IO for writing swap pages" added support for using direct_IO to
write swap pages but it is insufficient for highmem pages.

To support highmem pages, this patch kmaps() the page before calling the
direct_IO() handler.  As direct_IO deals with virtual addresses an
additional helper is necessary for get_kernel_pages() to lookup the struct
page for a kmap virtual address.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Paris <eparis@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Neil Brown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Xiaotian Feng <dfeng@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/highmem.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 774fa47b3b5b..ef788b5b4a35 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -39,10 +39,17 @@ extern unsigned long totalhigh_pages;
 
 void kmap_flush_unused(void);
 
+struct page *kmap_to_page(void *addr);
+
 #else /* CONFIG_HIGHMEM */
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
 
+static inline struct page *kmap_to_page(void *addr)
+{
+	return virt_to_page(addr);
+}
+
 #define totalhigh_pages 0UL
 
 #ifndef ARCH_HAS_KMAP
-- 
cgit v1.2.3


From a564b8f0398636ba30b07c0eaebdef7ff7837249 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 31 Jul 2012 16:45:12 -0700
Subject: nfs: enable swap on NFS

Implement the new swapfile a_ops for NFS and hook up ->direct_IO.  This
will set the NFS socket to SOCK_MEMALLOC and run socket reconnect under
PF_MEMALLOC as well as reset SOCK_MEMALLOC before engaging the protocol
->connect() method.

PF_MEMALLOC should allow the allocation of struct socket and related
objects and the early (re)setting of SOCK_MEMALLOC should allow us to
receive the packets required for the TCP connection buildup.

[jlayton@redhat.com: Restore PF_MEMALLOC task flags in all cases]
[dfeng@redhat.com: Fix handling of multiple swap files]
[a.p.zijlstra@chello.nl: Original patch]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Paris <eparis@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Neil Brown <neilb@suse.de>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Xiaotian Feng <dfeng@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nfs_fs.h      | 4 ++--
 include/linux/sunrpc/xprt.h | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 4b6043c20f77..35994f975a7f 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -473,10 +473,10 @@ extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t,
 			unsigned long);
 extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
 			const struct iovec *iov, unsigned long nr_segs,
-			loff_t pos);
+			loff_t pos, bool uio);
 extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
 			const struct iovec *iov, unsigned long nr_segs,
-			loff_t pos);
+			loff_t pos, bool uio);
 
 /*
  * linux/fs/nfs/dir.c
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 77d278defa70..cff40aa7db62 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -174,6 +174,8 @@ struct rpc_xprt {
 	unsigned long		state;		/* transport state */
 	unsigned char		shutdown   : 1,	/* being shut down */
 				resvport   : 1; /* use a reserved port */
+	unsigned int		swapper;	/* we're swapping over this
+						   transport */
 	unsigned int		bind_index;	/* bind function index */
 
 	/*
@@ -316,6 +318,7 @@ void			xprt_release_rqst_cong(struct rpc_task *task);
 void			xprt_disconnect_done(struct rpc_xprt *xprt);
 void			xprt_force_disconnect(struct rpc_xprt *xprt);
 void			xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
+int			xs_swapper(struct rpc_xprt *xprt, int enable);
 
 /*
  * Reserved bit positions in xprt->state
-- 
cgit v1.2.3


From 0030f535a5cf9b1841d2088c10a0b2f8f2987460 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 31 Jul 2012 16:45:25 -0700
Subject: mm: memcg: fix compaction/migration failing due to memcg limits

Compaction (and page migration in general) can currently be hindered
through pages being owned by memory cgroups that are at their limits and
unreclaimable.

The reason is that the replacement page is being charged against the limit
while the page being replaced is also still charged.  But this seems
unnecessary, given that only one of the two pages will still be in use
after migration finishes.

This patch changes the memcg migration sequence so that the replacement
page is not charged.  Whatever page is still in use after successful or
failed migration gets to keep the charge of the page that was going to be
replaced.

The replacement page will still show up temporarily in the rss/cache
statistics, this can be fixed in a later patch as it's less urgent.

Reported-by: David Rientjes <rientjes@google.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Wanpeng Li <liwp.linux@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5a3ee6423634..8d9489fdab2e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -98,9 +98,9 @@ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
 
 extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
 
-extern int
-mem_cgroup_prepare_migration(struct page *page,
-	struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask);
+extern void
+mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
+			     struct mem_cgroup **memcgp);
 extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 	struct page *oldpage, struct page *newpage, bool migration_ok);
 
@@ -276,11 +276,10 @@ static inline struct cgroup_subsys_state
 	return NULL;
 }
 
-static inline int
+static inline void
 mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
-	struct mem_cgroup **memcgp, gfp_t gfp_mask)
+			     struct mem_cgroup **memcgp)
 {
-	return 0;
 }
 
 static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
-- 
cgit v1.2.3


From 6527af5d1bea219d64095a5e30c1b1e0868aae16 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 31 Jul 2012 16:46:16 -0700
Subject: mm: remove redundant initialization

pg_data_t is zeroed before reaching free_area_init_core(), so remove the
now unnecessary initializations.

Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 65efb92da996..ad2cfd53dadc 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -179,11 +179,6 @@ extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp);
 #define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d)
 #define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d))
 
-static inline void zap_zone_vm_stats(struct zone *zone)
-{
-	memset(zone->vm_stat, 0, sizeof(zone->vm_stat));
-}
-
 extern void inc_zone_state(struct zone *, enum zone_stat_item);
 
 #ifdef CONFIG_SMP
-- 
cgit v1.2.3


From d833352a4338dc31295ed832a30c9ccff5c7a183 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 31 Jul 2012 16:46:20 -0700
Subject: mm: hugetlbfs: close race during teardown of hugetlbfs shared page
 tables

If a process creates a large hugetlbfs mapping that is eligible for page
table sharing and forks heavily with children some of whom fault and
others which destroy the mapping then it is possible for page tables to
get corrupted.  Some teardowns of the mapping encounter a "bad pmd" and
output a message to the kernel log.  The final teardown will trigger a
BUG_ON in mm/filemap.c.

This was reproduced in 3.4 but is known to have existed for a long time
and goes back at least as far as 2.6.37.  It was probably was introduced
in 2.6.20 by [39dde65c: shared page table for hugetlb page].  The messages
look like this;

[  ..........] Lots of bad pmd messages followed by this
[  127.164256] mm/memory.c:391: bad pmd ffff880412e04fe8(80000003de4000e7).
[  127.164257] mm/memory.c:391: bad pmd ffff880412e04ff0(80000003de6000e7).
[  127.164258] mm/memory.c:391: bad pmd ffff880412e04ff8(80000003de0000e7).
[  127.186778] ------------[ cut here ]------------
[  127.186781] kernel BUG at mm/filemap.c:134!
[  127.186782] invalid opcode: 0000 [#1] SMP
[  127.186783] CPU 7
[  127.186784] Modules linked in: af_packet cpufreq_conservative cpufreq_userspace cpufreq_powersave acpi_cpufreq mperf ext3 jbd dm_mod coretemp crc32c_intel usb_storage ghash_clmulni_intel aesni_intel i2c_i801 r8169 mii uas sr_mod cdrom sg iTCO_wdt iTCO_vendor_support shpchp serio_raw cryptd aes_x86_64 e1000e pci_hotplug dcdbas aes_generic container microcode ext4 mbcache jbd2 crc16 sd_mod crc_t10dif i915 drm_kms_helper drm i2c_algo_bit ehci_hcd ahci libahci usbcore rtc_cmos usb_common button i2c_core intel_agp video intel_gtt fan processor thermal thermal_sys hwmon ata_generic pata_atiixp libata scsi_mod
[  127.186801]
[  127.186802] Pid: 9017, comm: hugetlbfs-test Not tainted 3.4.0-autobuild #53 Dell Inc. OptiPlex 990/06D7TR
[  127.186804] RIP: 0010:[<ffffffff810ed6ce>]  [<ffffffff810ed6ce>] __delete_from_page_cache+0x15e/0x160
[  127.186809] RSP: 0000:ffff8804144b5c08  EFLAGS: 00010002
[  127.186810] RAX: 0000000000000001 RBX: ffffea000a5c9000 RCX: 00000000ffffffc0
[  127.186811] RDX: 0000000000000000 RSI: 0000000000000009 RDI: ffff88042dfdad00
[  127.186812] RBP: ffff8804144b5c18 R08: 0000000000000009 R09: 0000000000000003
[  127.186813] R10: 0000000000000000 R11: 000000000000002d R12: ffff880412ff83d8
[  127.186814] R13: ffff880412ff83d8 R14: 0000000000000000 R15: ffff880412ff83d8
[  127.186815] FS:  00007fe18ed2c700(0000) GS:ffff88042dce0000(0000) knlGS:0000000000000000
[  127.186816] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[  127.186817] CR2: 00007fe340000503 CR3: 0000000417a14000 CR4: 00000000000407e0
[  127.186818] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  127.186819] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  127.186820] Process hugetlbfs-test (pid: 9017, threadinfo ffff8804144b4000, task ffff880417f803c0)
[  127.186821] Stack:
[  127.186822]  ffffea000a5c9000 0000000000000000 ffff8804144b5c48 ffffffff810ed83b
[  127.186824]  ffff8804144b5c48 000000000000138a 0000000000001387 ffff8804144b5c98
[  127.186825]  ffff8804144b5d48 ffffffff811bc925 ffff8804144b5cb8 0000000000000000
[  127.186827] Call Trace:
[  127.186829]  [<ffffffff810ed83b>] delete_from_page_cache+0x3b/0x80
[  127.186832]  [<ffffffff811bc925>] truncate_hugepages+0x115/0x220
[  127.186834]  [<ffffffff811bca43>] hugetlbfs_evict_inode+0x13/0x30
[  127.186837]  [<ffffffff811655c7>] evict+0xa7/0x1b0
[  127.186839]  [<ffffffff811657a3>] iput_final+0xd3/0x1f0
[  127.186840]  [<ffffffff811658f9>] iput+0x39/0x50
[  127.186842]  [<ffffffff81162708>] d_kill+0xf8/0x130
[  127.186843]  [<ffffffff81162812>] dput+0xd2/0x1a0
[  127.186845]  [<ffffffff8114e2d0>] __fput+0x170/0x230
[  127.186848]  [<ffffffff81236e0e>] ? rb_erase+0xce/0x150
[  127.186849]  [<ffffffff8114e3ad>] fput+0x1d/0x30
[  127.186851]  [<ffffffff81117db7>] remove_vma+0x37/0x80
[  127.186853]  [<ffffffff81119182>] do_munmap+0x2d2/0x360
[  127.186855]  [<ffffffff811cc639>] sys_shmdt+0xc9/0x170
[  127.186857]  [<ffffffff81410a39>] system_call_fastpath+0x16/0x1b
[  127.186858] Code: 0f 1f 44 00 00 48 8b 43 08 48 8b 00 48 8b 40 28 8b b0 40 03 00 00 85 f6 0f 88 df fe ff ff 48 89 df e8 e7 cb 05 00 e9 d2 fe ff ff <0f> 0b 55 83 e2 fd 48 89 e5 48 83 ec 30 48 89 5d d8 4c 89 65 e0
[  127.186868] RIP  [<ffffffff810ed6ce>] __delete_from_page_cache+0x15e/0x160
[  127.186870]  RSP <ffff8804144b5c08>
[  127.186871] ---[ end trace 7cbac5d1db69f426 ]---

The bug is a race and not always easy to reproduce.  To reproduce it I was
doing the following on a single socket I7-based machine with 16G of RAM.

$ hugeadm --pool-pages-max DEFAULT:13G
$ echo $((18*1048576*1024)) > /proc/sys/kernel/shmmax
$ echo $((18*1048576*1024)) > /proc/sys/kernel/shmall
$ for i in `seq 1 9000`; do ./hugetlbfs-test; done

On my particular machine, it usually triggers within 10 minutes but
enabling debug options can change the timing such that it never hits.
Once the bug is triggered, the machine is in trouble and needs to be
rebooted.  The machine will respond but processes accessing proc like "ps
aux" will hang due to the BUG_ON.  shutdown will also hang and needs a
hard reset or a sysrq-b.

The basic problem is a race between page table sharing and teardown.  For
the most part page table sharing depends on i_mmap_mutex.  In some cases,
it is also taking the mm->page_table_lock for the PTE updates but with
shared page tables, it is the i_mmap_mutex that is more important.

Unfortunately it appears to be also insufficient. Consider the following
situation

Process A					Process B
---------					---------
hugetlb_fault					shmdt
  						LockWrite(mmap_sem)
    						  do_munmap
						    unmap_region
						      unmap_vmas
						        unmap_single_vma
						          unmap_hugepage_range
      						            Lock(i_mmap_mutex)
							    Lock(mm->page_table_lock)
							    huge_pmd_unshare/unmap tables <--- (1)
							    Unlock(mm->page_table_lock)
      						            Unlock(i_mmap_mutex)
  huge_pte_alloc				      ...
    Lock(i_mmap_mutex)				      ...
    vma_prio_walk, find svma, spte		      ...
    Lock(mm->page_table_lock)			      ...
    share spte					      ...
    Unlock(mm->page_table_lock)			      ...
    Unlock(i_mmap_mutex)			      ...
  hugetlb_no_page									  <--- (2)
						      free_pgtables
						        unlink_file_vma
							hugetlb_free_pgd_range
						    remove_vma_list

In this scenario, it is possible for Process A to share page tables with
Process B that is trying to tear them down.  The i_mmap_mutex on its own
does not prevent Process A walking Process B's page tables.  At (1) above,
the page tables are not shared yet so it unmaps the PMDs.  Process A sets
up page table sharing and at (2) faults a new entry.  Process B then trips
up on it in free_pgtables.

This patch fixes the problem by adding a new function
__unmap_hugepage_range_final that is only called when the VMA is about to
be destroyed.  This function clears VM_MAYSHARE during
unmap_hugepage_range() under the i_mmap_mutex.  This makes the VMA
ineligible for sharing and avoids the race.  Superficially this looks like
it would then be vunerable to truncate and madvise issues but hugetlbfs
has its own truncate handlers so does not use unmap_mapping_range() and
does not support madvise(DONTNEED).

This should be treated as a -stable candidate if it is merged.

Test program is as follows. The test case was mostly written by Michal
Hocko with a few minor changes to reproduce this bug.

==== CUT HERE ====

static size_t huge_page_size = (2UL << 20);
static size_t nr_huge_page_A = 512;
static size_t nr_huge_page_B = 5632;

unsigned int get_random(unsigned int max)
{
	struct timeval tv;

	gettimeofday(&tv, NULL);
	srandom(tv.tv_usec);
	return random() % max;
}

static void play(void *addr, size_t size)
{
	unsigned char *start = addr,
		      *end = start + size,
		      *a;
	start += get_random(size/2);

	/* we could itterate on huge pages but let's give it more time. */
	for (a = start; a < end; a += 4096)
		*a = 0;
}

int main(int argc, char **argv)
{
	key_t key = IPC_PRIVATE;
	size_t sizeA = nr_huge_page_A * huge_page_size;
	size_t sizeB = nr_huge_page_B * huge_page_size;
	int shmidA, shmidB;
	void *addrA = NULL, *addrB = NULL;
	int nr_children = 300, n = 0;

	if ((shmidA = shmget(key, sizeA, IPC_CREAT|SHM_HUGETLB|0660)) == -1) {
		perror("shmget:");
		return 1;
	}

	if ((addrA = shmat(shmidA, addrA, SHM_R|SHM_W)) == (void *)-1UL) {
		perror("shmat");
		return 1;
	}
	if ((shmidB = shmget(key, sizeB, IPC_CREAT|SHM_HUGETLB|0660)) == -1) {
		perror("shmget:");
		return 1;
	}

	if ((addrB = shmat(shmidB, addrB, SHM_R|SHM_W)) == (void *)-1UL) {
		perror("shmat");
		return 1;
	}

fork_child:
	switch(fork()) {
		case 0:
			switch (n%3) {
			case 0:
				play(addrA, sizeA);
				break;
			case 1:
				play(addrB, sizeB);
				break;
			case 2:
				break;
			}
			break;
		case -1:
			perror("fork:");
			break;
		default:
			if (++n < nr_children)
				goto fork_child;
			play(addrA, sizeA);
			break;
	}
	shmdt(addrA);
	shmdt(addrB);
	do {
		wait(NULL);
	} while (--n > 0);
	shmctl(shmidA, IPC_RMID, NULL);
	shmctl(shmidB, IPC_RMID, NULL);
	return 0;
}

[akpm@linux-foundation.org: name the declaration's args, fix CONFIG_HUGETLBFS=n build]
Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index f9db20bfa9fc..225164842ab6 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -48,6 +48,10 @@ int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
 			unsigned long *, int *, int, unsigned int flags);
 void unmap_hugepage_range(struct vm_area_struct *,
 			  unsigned long, unsigned long, struct page *);
+void __unmap_hugepage_range_final(struct mmu_gather *tlb,
+			  struct vm_area_struct *vma,
+			  unsigned long start, unsigned long end,
+			  struct page *ref_page);
 void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 				unsigned long start, unsigned long end,
 				struct page *ref_page);
@@ -130,6 +134,13 @@ static inline void copy_huge_page(struct page *dst, struct page *src)
 
 #define hugetlb_change_protection(vma, address, end, newprot)
 
+static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
+			struct vm_area_struct *vma, unsigned long start,
+			unsigned long end, struct page *ref_page)
+{
+	BUG();
+}
+
 static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
 			struct vm_area_struct *vma, unsigned long start,
 			unsigned long end, struct page *ref_page)
-- 
cgit v1.2.3


From 392a325c4351339cfbf182bb5a1444df1cf65dbb Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@queued.net>
Date: Tue, 10 Jul 2012 19:31:51 -0700
Subject: Platform: OLPC: add a stub to drivers/platform/ for the OLPC EC
 driver

The OLPC EC driver has outgrown arch/x86/platform/.  It's time to both
share common code amongst different architectures, as well as move it out
of arch/x86/.  The XO-1.75 is ARM-based, and the EC driver shares a lot of
code with the x86 code.

Signed-off-by: Andres Salomon <dilinger@queued.net>
Acked-by: Paul Fox <pgf@laptop.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/olpc-ec.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 include/linux/olpc-ec.h

(limited to 'include/linux')

diff --git a/include/linux/olpc-ec.h b/include/linux/olpc-ec.h
new file mode 100644
index 000000000000..6d4e426d9fdc
--- /dev/null
+++ b/include/linux/olpc-ec.h
@@ -0,0 +1,29 @@
+#ifndef _LINUX_OLPC_EC_H
+#define _LINUX_OLPC_EC_H
+
+/* XO-1 EC commands */
+#define EC_FIRMWARE_REV			0x08
+#define EC_WRITE_SCI_MASK		0x1b
+#define EC_WAKE_UP_WLAN			0x24
+#define EC_WLAN_LEAVE_RESET		0x25
+#define EC_READ_EB_MODE			0x2a
+#define EC_SET_SCI_INHIBIT		0x32
+#define EC_SET_SCI_INHIBIT_RELEASE	0x34
+#define EC_WLAN_ENTER_RESET		0x35
+#define EC_WRITE_EXT_SCI_MASK		0x38
+#define EC_SCI_QUERY			0x84
+#define EC_EXT_SCI_QUERY		0x85
+
+#ifdef CONFIG_OLPC
+
+extern int olpc_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *outbuf,
+		size_t outlen);
+
+#else
+
+static inline int olpc_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *outbuf,
+		size_t outlen) { return -ENODEV; }
+
+#endif /* CONFIG_OLPC */
+
+#endif /* _LINUX_OLPC_EC_H */
-- 
cgit v1.2.3


From 3d26c20bae9e97c98f7240184427d3a38515d406 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@queued.net>
Date: Wed, 11 Jul 2012 17:40:25 -0700
Subject: Platform: OLPC: allow EC cmd to be overridden, and create a workqueue
 to call it

This provides a new API allows different OLPC architectures to override the
EC driver.  x86 and ARM OLPC machines use completely different EC backends.

The olpc_ec_cmd is synchronous, and waits for the workqueue to send the
command to the EC.  Multiple callers can run olpc_ec_cmd() at once, and
they will by serialized and sleep while only one executes on the EC at a time.

We don't provide an unregister function, as that doesn't make sense within
the context of OLPC machines - there's only ever 1 EC, it's critical to
functionality, and it certainly not hotpluggable.

Signed-off-by: Andres Salomon <dilinger@queued.net>
Acked-by: Paul Fox <pgf@laptop.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/olpc-ec.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/olpc-ec.h b/include/linux/olpc-ec.h
index 6d4e426d9fdc..231e96f5dfe2 100644
--- a/include/linux/olpc-ec.h
+++ b/include/linux/olpc-ec.h
@@ -14,8 +14,14 @@
 #define EC_SCI_QUERY			0x84
 #define EC_EXT_SCI_QUERY		0x85
 
+struct olpc_ec_driver {
+	int (*ec_cmd)(u8, u8 *, size_t, u8 *, size_t, void *);
+};
+
 #ifdef CONFIG_OLPC
 
+extern void olpc_ec_driver_register(struct olpc_ec_driver *drv, void *arg);
+
 extern int olpc_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *outbuf,
 		size_t outlen);
 
-- 
cgit v1.2.3


From ac2504151f5af27bbf0c0362b7da5951e05dfc43 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@queued.net>
Date: Fri, 13 Jul 2012 05:57:17 -0700
Subject: Platform: OLPC: turn EC driver into a platform_driver

The 1.75-based OLPC EC driver already does this; let's do it for all EC
drivers.  This gives us nice suspend/resume hooks, amongst other things.

We want to run the EC's suspend hooks later than other drivers (which may
be setting wakeup masks or be running EC commands).  We also want to run
the EC's resume hooks earlier than other drivers (which may want to run EC
commands).

Signed-off-by: Andres Salomon <dilinger@queued.net>
Acked-by: Paul Fox <pgf@laptop.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/olpc-ec.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/olpc-ec.h b/include/linux/olpc-ec.h
index 231e96f5dfe2..5bb6e760aa61 100644
--- a/include/linux/olpc-ec.h
+++ b/include/linux/olpc-ec.h
@@ -14,7 +14,13 @@
 #define EC_SCI_QUERY			0x84
 #define EC_EXT_SCI_QUERY		0x85
 
+struct platform_device;
+
 struct olpc_ec_driver {
+	int (*probe)(struct platform_device *);
+	int (*suspend)(struct platform_device *);
+	int (*resume)(struct platform_device *);
+
 	int (*ec_cmd)(u8, u8 *, size_t, u8 *, size_t, void *);
 };
 
-- 
cgit v1.2.3


From 4f46f8ac80416b0e8fd3aba6a0d842205fb29140 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Mon, 30 Jul 2012 21:28:27 +0200
Subject: dmaengine: shdma: restore partial transfer calculation

The recent shdma driver split has mistakenly removed support for partial
DMA transfer size calculation on forced termination. This patch restores
it.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Acked-by: Vinod Koul <vinod.koul@linux.intel.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/shdma-base.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h
index 93f9821554b6..a3728bf66f0e 100644
--- a/include/linux/shdma-base.h
+++ b/include/linux/shdma-base.h
@@ -50,6 +50,7 @@ struct shdma_desc {
 	struct list_head node;
 	struct dma_async_tx_descriptor async_tx;
 	enum dma_transfer_direction direction;
+	size_t partial;
 	dma_cookie_t cookie;
 	int chunks;
 	int mark;
@@ -98,6 +99,7 @@ struct shdma_ops {
 	void (*start_xfer)(struct shdma_chan *, struct shdma_desc *);
 	struct shdma_desc *(*embedded_desc)(void *, int);
 	bool (*chan_irq)(struct shdma_chan *, int);
+	size_t (*get_partial)(struct shdma_chan *, struct shdma_desc *);
 };
 
 struct shdma_dev {
-- 
cgit v1.2.3


From c83f6bf98dc1f1a194118b3830706cebbebda8c4 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Wed, 1 Aug 2012 12:24:18 +0200
Subject: block: add partition resize function to blkpg ioctl

Add a new operation code (BLKPG_RESIZE_PARTITION) to the BLKPG ioctl that
allows altering the size of an existing partition, even if it is currently
in use.

This patch converts hd_struct->nr_sects into sequence counter because
One might extend a partition while IO is happening to it and update of
nr_sects can be non-atomic on 32bit machines with 64bit sector_t. This
can lead to issues like reading inconsistent size of a partition. Sequence
counter have been used so that readers don't have to take bdev mutex lock
as we call sector_in_part() very frequently.

Now all the access to hd_struct->nr_sects should happen using sequence
counter read/update helper functions part_nr_sects_read/part_nr_sects_write.
There is one exception though, set_capacity()/get_capacity(). I think
theoritically race should exist there too but this patch does not
modify set_capacity()/get_capacity() due to sheer number of call sites
and I am afraid that change might break something. I have left that as a
TODO item. We can handle it later if need be. This patch does not introduce
any new races as such w.r.t set_capacity()/get_capacity().

v2: Add CONFIG_LBDAF test to UP preempt case as suggested by Phillip.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Phillip Susi <psusi@ubuntu.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkpg.h |  1 +
 include/linux/genhd.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkpg.h b/include/linux/blkpg.h
index faf8a45af210..a8519446c111 100644
--- a/include/linux/blkpg.h
+++ b/include/linux/blkpg.h
@@ -40,6 +40,7 @@ struct blkpg_ioctl_arg {
 /* The subfunctions (for the op field) */
 #define BLKPG_ADD_PARTITION	1
 #define BLKPG_DEL_PARTITION	2
+#define BLKPG_RESIZE_PARTITION	3
 
 /* Sizes of name fields. Unused at present. */
 #define BLKPG_DEVNAMELTH	64
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 017a7fb5a1fc..b88723b81b3d 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -98,7 +98,13 @@ struct partition_meta_info {
 
 struct hd_struct {
 	sector_t start_sect;
+	/*
+	 * nr_sects is protected by sequence counter. One might extend a
+	 * partition while IO is happening to it and update of nr_sects
+	 * can be non-atomic on 32bit machines with 64bit sector_t.
+	 */
 	sector_t nr_sects;
+	seqcount_t nr_sects_seq;
 	sector_t alignment_offset;
 	unsigned int discard_alignment;
 	struct device __dev;
@@ -648,6 +654,57 @@ static inline void hd_struct_put(struct hd_struct *part)
 		__delete_partition(part);
 }
 
+/*
+ * Any access of part->nr_sects which is not protected by partition
+ * bd_mutex or gendisk bdev bd_mutex, should be done using this
+ * accessor function.
+ *
+ * Code written along the lines of i_size_read() and i_size_write().
+ * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption
+ * on.
+ */
+static inline sector_t part_nr_sects_read(struct hd_struct *part)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
+	sector_t nr_sects;
+	unsigned seq;
+	do {
+		seq = read_seqcount_begin(&part->nr_sects_seq);
+		nr_sects = part->nr_sects;
+	} while (read_seqcount_retry(&part->nr_sects_seq, seq));
+	return nr_sects;
+#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
+	sector_t nr_sects;
+
+	preempt_disable();
+	nr_sects = part->nr_sects;
+	preempt_enable();
+	return nr_sects;
+#else
+	return part->nr_sects;
+#endif
+}
+
+/*
+ * Should be called with mutex lock held (typically bd_mutex) of partition
+ * to provide mutual exlusion among writers otherwise seqcount might be
+ * left in wrong state leaving the readers spinning infinitely.
+ */
+static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
+	write_seqcount_begin(&part->nr_sects_seq);
+	part->nr_sects = size;
+	write_seqcount_end(&part->nr_sects_seq);
+#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
+	preempt_disable();
+	part->nr_sects = size;
+	preempt_enable();
+#else
+	part->nr_sects = size;
+#endif
+}
+
 #else /* CONFIG_BLOCK */
 
 static inline void printk_all_partitions(void) { }
-- 
cgit v1.2.3


From 068535f1fef4c90aee23eb7b9b9a71c5b72d7cd0 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 1 Aug 2012 07:56:16 -0400
Subject: locks: remove unused lm_release_private

In commit 3b6e2723f32d ("locks: prevent side-effects of
locks_release_private before file_lock is initialized") we removed the
last user of lm_release_private without removing the field itself.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7eed5b98ae2..4ba5c8715523 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1162,7 +1162,6 @@ struct lock_manager_operations {
 	int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
 	void (*lm_notify)(struct file_lock *);	/* unblock callback */
 	int (*lm_grant)(struct file_lock *, struct file_lock *, int);
-	void (*lm_release_private)(struct file_lock *);
 	void (*lm_break)(struct file_lock *);
 	int (*lm_change)(struct file_lock **, int);
 };
-- 
cgit v1.2.3


From 30b678d844af3305cda5953467005cebb5d7b687 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 30 Jul 2012 15:57:00 +0000
Subject: net: Allow driver to limit number of GSO segments per skb

A peer (or local user) may cause TCP to use a nominal MSS of as little
as 88 (actual MSS of 76 with timestamps).  Given that we have a
sufficiently prodigious local sender and the peer ACKs quickly enough,
it is nevertheless possible to grow the window for such a connection
to the point that we will try to send just under 64K at once.  This
results in a single skb that expands to 861 segments.

In some drivers with TSO support, such an skb will require hundreds of
DMA descriptors; a substantial fraction of a TX ring or even more than
a full ring.  The TX queue selected for the skb may stall and trigger
the TX watchdog repeatedly (since the problem skb will be retried
after the TX reset).  This particularly affects sfc, for which the
issue is designated as CVE-2012-3412.

Therefore:
1. Add the field net_device::gso_max_segs holding the device-specific
   limit.
2. In netif_skb_features(), if the number of segments is too high then
   mask out GSO features to force fall back to software GSO.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index eb06e58bed0b..a9db4f33407f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1300,6 +1300,8 @@ struct net_device {
 	/* for setting kernel sock attribute on TCP connection setup */
 #define GSO_MAX_SIZE		65536
 	unsigned int		gso_max_size;
+#define GSO_MAX_SEGS		65535
+	u16			gso_max_segs;
 
 #ifdef CONFIG_DCB
 	/* Data Center Bridging netlink ops */
-- 
cgit v1.2.3


From c6e666345e1b79c62ba82339cc7d55a89cb73f88 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 2 Aug 2012 09:48:50 +0200
Subject: block: split discard into aligned requests

When a disk has large discard_granularity and small max_discard_sectors,
discards are not split with optimal alignment.  In the limit case of
discard_granularity == max_discard_sectors, no request could be aligned
correctly, so in fact you might end up with no discarded logical blocks
at all.

Another example that helps showing the condition in the patch is with
discard_granularity == 64, max_discard_sectors == 128.  A request that is
submitted for 256 sectors 2..257 will be split in two: 2..129, 130..257.
However, only 2 aligned blocks out of 3 are included in the request;
128..191 may be left intact and not discarded.  With this patch, the
first request will be truncated to ensure good alignment of what's left,
and the split will be 2..127, 128..255, 256..257.  The patch will also
take into account the discard_alignment.

At most one extra request will be introduced, because the first request
will be reduced by at most granularity-1 sectors, and granularity
must be less than max_discard_sectors.  Subsequent requests will run
on round_down(max_discard_sectors, granularity) sectors, as in the
current code.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Tested-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4e72a9d48232..281516ae8b4e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1139,6 +1139,16 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector
 		& (lim->discard_granularity - 1);
 }
 
+static inline int bdev_discard_alignment(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (bdev != bdev->bd_contains)
+		return bdev->bd_part->discard_alignment;
+
+	return q->limits.discard_alignment;
+}
+
 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
 {
 	if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1)
-- 
cgit v1.2.3


From c263c2c1ad615e935d563cd7be11d417f94895d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <zajec5@gmail.com>
Date: Mon, 23 Jul 2012 18:20:12 +0200
Subject: bcma: BCM43228 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcma/bcma_driver_chipcommon.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 3c80885fa829..d323a4b4143c 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -89,6 +89,12 @@
 #define  BCMA_CC_CHIPST_4313_OTP_PRESENT	2
 #define  BCMA_CC_CHIPST_4331_SPROM_PRESENT	2
 #define  BCMA_CC_CHIPST_4331_OTP_PRESENT	4
+#define  BCMA_CC_CHIPST_43228_ILP_DIV_EN	0x00000001
+#define  BCMA_CC_CHIPST_43228_OTP_PRESENT	0x00000002
+#define  BCMA_CC_CHIPST_43228_SERDES_REFCLK_PADSEL	0x00000004
+#define  BCMA_CC_CHIPST_43228_SDIO_MODE		0x00000008
+#define  BCMA_CC_CHIPST_43228_SDIO_OTP_PRESENT	0x00000010
+#define  BCMA_CC_CHIPST_43228_SDIO_RESET	0x00000020
 #define  BCMA_CC_CHIPST_4706_PKG_OPTION		BIT(0) /* 0: full-featured package 1: low-cost package */
 #define  BCMA_CC_CHIPST_4706_SFLASH_PRESENT	BIT(1) /* 0: parallel, 1: serial flash is present */
 #define  BCMA_CC_CHIPST_4706_SFLASH_TYPE	BIT(2) /* 0: 8b-p/ST-s flash, 1: 16b-p/Atmal-s flash */
-- 
cgit v1.2.3


From f6166384095b7ecf77752b5e9096e6d03d75f7ae Mon Sep 17 00:00:00 2001
From: Peng Tao <bergwolf@gmail.com>
Date: Thu, 2 Aug 2012 15:36:09 +0300
Subject: NFS41: add pg_layout_private to nfs_pageio_descriptor

To allow layout driver to pass private information around
pg_init/pg_doio.

Signed-off-by: Peng Tao <tao.peng@emc.com>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_page.h | 1 +
 include/linux/nfs_xdr.h  | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 880805774f9f..92ce5783b707 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -69,6 +69,7 @@ struct nfs_pageio_descriptor {
 	const struct nfs_pgio_completion_ops *pg_completion_ops;
 	struct pnfs_layout_segment *pg_lseg;
 	struct nfs_direct_req	*pg_dreq;
+	void			*pg_layout_private;
 };
 
 #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 00485e084394..ac7c8ae254f2 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1248,6 +1248,7 @@ struct nfs_pgio_header {
 	void (*release) (struct nfs_pgio_header *hdr);
 	const struct nfs_pgio_completion_ops *completion_ops;
 	struct nfs_direct_req	*dreq;
+	void			*layout_private;
 	spinlock_t		lock;
 	/* fields protected by lock */
 	int			pnfs_error;
-- 
cgit v1.2.3


From 85b9f66a41eb8ee3f1dfc95707412705463cdd97 Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Thu, 2 Aug 2012 23:42:04 +0200
Subject: block: Add blk_bio_map_sg() helper

Add a helper to map a bio to a scatterlist, modelled after
blk_rq_map_sg.

This helper is useful for any driver that wants to create
a scatterlist from its ->make_request_fn method.

Changes in v2:
 - Use __blk_segment_map_sg to avoid duplicated code
 - Add cocbook style function comment

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 281516ae8b4e..dc632975d54f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -894,6 +894,8 @@ extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
+extern int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
+			  struct scatterlist *sglist);
 extern void blk_dump_rq_flags(struct request *, char *);
 extern long nr_blockdev_pages(void);
 
-- 
cgit v1.2.3


From 095adbb6441172985f5ddc3b9e88cb3191bdeac4 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Tue, 31 Jul 2012 17:41:09 +0200
Subject: ACPI: Only count valid srat memory structures

Otherwise you could run into:
WARN_ON in numa_register_memblks(), because node_possible_map is zero

References: https://bugzilla.novell.com/show_bug.cgi?id=757888

On this machine (ProLiant ML570 G3) the SRAT table contains:
  - No processor affinities
  - One memory affinity structure (which is set disabled)

CC: Per Jessen <per@opensuse.org>
CC: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3ad510b25283..4f2a76224509 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -96,7 +96,7 @@ void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
 void acpi_numa_slit_init (struct acpi_table_slit *slit);
 void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa);
 void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa);
-void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma);
+int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma);
 void acpi_numa_arch_fixup(void);
 
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
-- 
cgit v1.2.3


From 76582d0a68ab6c46987dd678165c26ad81c44cdb Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@avionic-design.de>
Date: Wed, 25 Jul 2012 16:24:49 +0200
Subject: iommu: Include linux/types.h

The linux/iommu.h header uses types defined in linux/types.h but doesn't
include it.

Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/iommu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 54d6d690073c..0a85199191b9 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -20,6 +20,7 @@
 #define __LINUX_IOMMU_H
 
 #include <linux/errno.h>
+#include <linux/types.h>
 
 #define IOMMU_READ	(1)
 #define IOMMU_WRITE	(2)
-- 
cgit v1.2.3


From ba1eabfade3272596000bf3c62b68ca375e48b08 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 3 Aug 2012 15:55:41 +0200
Subject: iommu: Add missing forward declaration in include file

The 'struct notifier_block' is not used in linux/iommu.h but
not declared anywhere. Add a forward declaration for it.

Reported-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/iommu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0a85199191b9..7e83370e6fd2 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -31,6 +31,7 @@ struct iommu_group;
 struct bus_type;
 struct device;
 struct iommu_domain;
+struct notifier_block;
 
 /* iommu fault flags */
 #define IOMMU_FAULT_READ	0x0
-- 
cgit v1.2.3


From f0cd2dbb6cf387c11f87265462e370bb5469299e Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 25 Jul 2012 18:11:59 +0300
Subject: vfs: kill write_super and sync_supers

Finally we can kill the 'sync_supers' kernel thread along with the
'->write_super()' superblock operation because all the users are gone.
Now every file-system is supposed to self-manage own superblock and
its dirty state.

The nice thing about killing this thread is that it improves power management.
Indeed, 'sync_supers' is a source of monotonic system wake-ups - it woke up
every 5 seconds no matter what - even if there were no dirty superblocks and
even if there were no file-systems using this service (e.g., btrfs and
journalled ext4 do not need it). So it was wasting power most of the time. And
because the thread was in the core of the kernel, all systems had to have it.
So I am quite happy to make it go away.

Interestingly, this thread is a left-over from the pdflush kernel thread which
was a self-forking kernel thread responsible for all the write-back in old
Linux kernels. It was turned into per-block device BDI threads, and
'sync_supers' was a left-over. Thus, R.I.P, pdflush as well.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/backing-dev.h | 1 -
 include/linux/fs.h          | 3 ---
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index c97c6b9cd38e..2a9a9abc9126 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -124,7 +124,6 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
 int bdi_writeback_thread(void *data);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
-void bdi_arm_supers_timer(void);
 void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
 void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 38dba16c4176..aa110476a95b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1491,7 +1491,6 @@ struct sb_writers {
 struct super_block {
 	struct list_head	s_list;		/* Keep this first */
 	dev_t			s_dev;		/* search index; _not_ kdev_t */
-	unsigned char		s_dirt;
 	unsigned char		s_blocksize_bits;
 	unsigned long		s_blocksize;
 	loff_t			s_maxbytes;	/* Max file size */
@@ -1861,7 +1860,6 @@ struct super_operations {
 	int (*drop_inode) (struct inode *);
 	void (*evict_inode) (struct inode *);
 	void (*put_super) (struct super_block *);
-	void (*write_super) (struct super_block *);
 	int (*sync_fs)(struct super_block *sb, int wait);
 	int (*freeze_fs) (struct super_block *);
 	int (*unfreeze_fs) (struct super_block *);
@@ -2397,7 +2395,6 @@ extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
 			   int datasync);
 extern int vfs_fsync(struct file *file, int datasync);
 extern int generic_write_sync(struct file *file, loff_t pos, loff_t count);
-extern void sync_supers(void);
 extern void emergency_sync(void);
 extern void emergency_remount(void);
 #ifdef CONFIG_BLOCK
-- 
cgit v1.2.3


From 0d5c3eba2e1e5aa74e097f49bc90b58f607e101c Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 25 Jul 2012 18:12:08 +0300
Subject: vfs: nuke pdflush from comments

The pdflush thread is long gone, so this patch removes references to pdflush
from vfs comments.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/writeback.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index c66fe3332d83..50c3e8fa06a8 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -104,7 +104,6 @@ static inline void wait_on_inode(struct inode *inode)
 	wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE);
 }
 
-
 /*
  * mm/page-writeback.c
  */
-- 
cgit v1.2.3


From d796c52ef0b71a988364f6109aeb63d79c5b116b Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 5 Aug 2012 19:04:57 -0400
Subject: ext4: make sure the journal sb is written in ext4_clear_journal_err()

After we transfer set the EXT4_ERROR_FS bit in the file system
superblock, it's not enough to call jbd2_journal_clear_err() to clear
the error indication from journal superblock --- we need to call
jbd2_journal_update_sb_errno() as well.  Otherwise, when the root file
system is mounted read-only, the journal is replayed, and the error
indicator is transferred to the superblock --- but the s_errno field
in the jbd2 superblock is left set (since although we cleared it in
memory, we never flushed it out to disk).

This can end up confusing e2fsck.  We should make e2fsck more robust
in this case, but the kernel shouldn't be leaving things in this
confused state, either.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@kernel.org
---
 include/linux/jbd2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f334c7fab967..3efc43f3f162 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1125,6 +1125,7 @@ extern int	   jbd2_journal_destroy    (journal_t *);
 extern int	   jbd2_journal_recover    (journal_t *journal);
 extern int	   jbd2_journal_wipe       (journal_t *, int);
 extern int	   jbd2_journal_skip_recovery	(journal_t *);
+extern void	   jbd2_journal_update_sb_errno(journal_t *);
 extern void	   jbd2_journal_update_sb_log_tail	(journal_t *, tid_t,
 				unsigned long, int);
 extern void	   __jbd2_journal_abort_hard	(journal_t *);
-- 
cgit v1.2.3


From 5d299f3d3c8a2fbc732b1bf03af36333ccec3130 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Aug 2012 05:09:33 +0000
Subject: net: ipv6: fix TCP early demux

IPv6 needs a cookie in dst_check() call.

We need to add rx_dst_cookie and provide a family independent
sk_rx_dst_set(sk, skb) method to properly support IPv6 TCP early demux.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 379e433e15e0..879db26ec401 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -369,6 +369,7 @@ struct ipv6_pinfo {
 	__u8			rcv_tclass;
 
 	__u32			dst_cookie;
+	__u32			rx_dst_cookie;
 
 	struct ipv6_mc_socklist	__rcu *ipv6_mc_list;
 	struct ipv6_ac_socklist	*ipv6_ac_list;
-- 
cgit v1.2.3


From 035534ed3377d9def2c17717899fd64a111a785b Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Mon, 6 Aug 2012 18:02:29 +0200
Subject: canfd: remove redundant CAN FD flag

The first idea of the CAN FD implementation started with a new struct
canfd_frame to be used for both CAN FD frames and legacy CAN frames.
The now mainlined implementation supports both CAN frame types simultaneously
and distinguishes them only by their required sizes: CAN_MTU and CANFD_MTU.

Only the struct canfd_frame contains a flags element which is needed for the
additional CAN FD information. As CAN FD implicitly means that the 'Extened
Data Length' mode is enabled the formerly defined CANFD_EDL bit became
redundant and also confusing as an unset bit would be an error and would
always need to be tested.

This patch removes the obsolete CANFD_EDL bit and clarifies the documentation
for the use of struct canfd_frame and the CAN FD relevant flags.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can.h | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can.h b/include/linux/can.h
index 018055efc034..e52958d7c2d1 100644
--- a/include/linux/can.h
+++ b/include/linux/can.h
@@ -74,20 +74,21 @@ struct can_frame {
 /*
  * defined bits for canfd_frame.flags
  *
- * As the default for CAN FD should be to support the high data rate in the
- * payload section of the frame (HDR) and to support up to 64 byte in the
- * data section (EDL) the bits are only set in the non-default case.
- * Btw. as long as there's no real implementation for CAN FD network driver
- * these bits are only preliminary.
+ * The use of struct canfd_frame implies the Extended Data Length (EDL) bit to
+ * be set in the CAN frame bitstream on the wire. The EDL bit switch turns
+ * the CAN controllers bitstream processor into the CAN FD mode which creates
+ * two new options within the CAN FD frame specification:
  *
- * RX: NOHDR/NOEDL - info about received CAN FD frame
- *     ESI         - bit from originating CAN controller
- * TX: NOHDR/NOEDL - control per-frame settings if supported by CAN controller
- *     ESI         - bit is set by local CAN controller
+ * Bit Rate Switch - to indicate a second bitrate is/was used for the payload
+ * Error State Indicator - represents the error state of the transmitting node
+ *
+ * As the CANFD_ESI bit is internally generated by the transmitting CAN
+ * controller only the CANFD_BRS bit is relevant for real CAN controllers when
+ * building a CAN FD frame for transmission. Setting the CANFD_ESI bit can make
+ * sense for virtual CAN interfaces to test applications with echoed frames.
  */
-#define CANFD_NOHDR 0x01 /* frame without high data rate */
-#define CANFD_NOEDL 0x02 /* frame without extended data length */
-#define CANFD_ESI   0x04 /* error state indicator */
+#define CANFD_BRS 0x01 /* bit rate switch (second bitrate for payload data) */
+#define CANFD_ESI 0x02 /* error state indicator of the transmitting node */
 
 /**
  * struct canfd_frame - CAN flexible data rate frame structure
-- 
cgit v1.2.3


From 817fea2df3c24b22f6123dc0106eb063b7132883 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 7 Aug 2012 11:48:33 -0600
Subject: vfio: Include vfio.h in installed headers

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index d9a754474878..fa217607c582 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -391,6 +391,7 @@ header-y += v4l2-dv-timings.h
 header-y += v4l2-mediabus.h
 header-y += v4l2-subdev.h
 header-y += veth.h
+header-y += vfio.h
 header-y += vhost.h
 header-y += videodev2.h
 header-y += virtio_9p.h
-- 
cgit v1.2.3


From 300d3739e873d50d4c6e3656f89007a217fb1d29 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 7 Aug 2012 13:50:22 +0200
Subject: Revert "NMI watchdog: fix for lockup detector breakage on resume"

Revert commit 45226e9 (NMI watchdog: fix for lockup detector breakage
on resume) which breaks resume from system suspend on my SH7372
Mackerel board (by causing a NULL pointer dereference to happen) and
is generally wrong, because it abuses the CPU hotplug functionality
in a shamelessly blatant way.

The original issue should be addressed through appropriate syscore
resume callback instead.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/sched.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c147e7024f11..b8c86648a2f9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -334,14 +334,6 @@ static inline void lockup_detector_init(void)
 }
 #endif
 
-#if defined(CONFIG_LOCKUP_DETECTOR) && defined(CONFIG_SUSPEND)
-void lockup_detector_bootcpu_resume(void);
-#else
-static inline void lockup_detector_bootcpu_resume(void)
-{
-}
-#endif
-
 #ifdef CONFIG_DETECT_HUNG_TASK
 extern unsigned int  sysctl_hung_task_panic;
 extern unsigned long sysctl_hung_task_check_count;
-- 
cgit v1.2.3


From 59ee93a528b94ef4e81a08db252b0326feff171f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 5 Aug 2012 14:58:37 +0000
Subject: ARM: pxa: remove irq_to_gpio from ezx-pcap driver

The irq_to_gpio function was removed from the pxa platform
in linux-3.2, and this driver has been broken since.

There is actually no in-tree user of this driver that adds
this platform device, but the driver can and does get enabled
on some platforms.

Without this patch, building ezx_defconfig results in:

drivers/mfd/ezx-pcap.c: In function 'pcap_isr_work':
drivers/mfd/ezx-pcap.c:205:2: error: implicit declaration of function 'irq_to_gpio' [-Werror=implicit-function-declaration]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com>
Cc: stable@vger.kernel.org (v3.2+)
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Daniel Ribeiro <drwyrm@gmail.com>
---
 include/linux/mfd/ezx-pcap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h
index 40c372165f3e..32a1b5cfeba1 100644
--- a/include/linux/mfd/ezx-pcap.h
+++ b/include/linux/mfd/ezx-pcap.h
@@ -16,6 +16,7 @@ struct pcap_subdev {
 struct pcap_platform_data {
 	unsigned int irq_base;
 	unsigned int config;
+	int gpio;
 	void (*init) (void *);	/* board specific init */
 	int num_subdevs;
 	struct pcap_subdev *subdevs;
-- 
cgit v1.2.3


From 4eef6cbfcc03b294d9d334368a851b35b496ce53 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 30 Apr 2012 16:21:37 +0000
Subject: Input: eeti_ts: pass gpio value instead of IRQ

The EETI touchscreen asserts its IRQ line as soon as it has data in its
internal buffers. The line is automatically deasserted once all data has
been read via I2C. Hence, the driver has to monitor the GPIO line and
cannot simply rely on the interrupt handler reception.

In the current implementation of the driver, irq_to_gpio() is used to
determine the GPIO number from the i2c_client's IRQ value.

As irq_to_gpio() is not available on all platforms, this patch changes
this and makes the driver ignore the passed in IRQ. Instead, a GPIO is
added to the platform_data struct and gpio_to_irq is used to derive the
IRQ from that GPIO. If this fails, bail out. The driver is only able to
work in environments where the touchscreen GPIO can be mapped to an
IRQ.

Without this patch, building raumfeld_defconfig results in:

drivers/input/touchscreen/eeti_ts.c: In function 'eeti_ts_irq_active':
drivers/input/touchscreen/eeti_ts.c:65:2: error: implicit declaration of function 'irq_to_gpio' [-Werror=implicit-function-declaration]

Signed-off-by: Daniel Mack <zonque@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: stable@vger.kernel.org (v3.2+)
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Sven Neumann <s.neumann@raumfeld.com>
Cc: linux-input@vger.kernel.org
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
---
 include/linux/input/eeti_ts.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/input/eeti_ts.h b/include/linux/input/eeti_ts.h
index f875b316249d..16625d799b6f 100644
--- a/include/linux/input/eeti_ts.h
+++ b/include/linux/input/eeti_ts.h
@@ -2,6 +2,7 @@
 #define LINUX_INPUT_EETI_TS_H
 
 struct eeti_ts_platform_data {
+	int irq_gpio;
 	unsigned int irq_active_high;
 };
 
-- 
cgit v1.2.3


From 276f0f5d157bb4a816053f4f3a941dbcd4f76556 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fusionio.com>
Date: Thu, 9 Aug 2012 15:20:23 +0200
Subject: block: disable discard request merge temporarily

The SCSI discard request merge never worked, and looks no solution
for in future, let's disable it temporarily.

Signed-off-by: Shaohua Li <shli@fusionio.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index dc632975d54f..4a2ab7c85393 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -601,7 +601,7 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync)
  * it already be started by driver.
  */
 #define RQ_NOMERGE_FLAGS	\
-	(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
+	(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_DISCARD)
 #define rq_mergeable(rq)	\
 	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
 	 (((rq)->cmd_flags & REQ_DISCARD) || \
-- 
cgit v1.2.3


From 02b69cbdc2fb2e1bfbfd9ac0c246d7be1b08d3cd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 9 Aug 2012 10:08:46 +0000
Subject: netfilter: nf_ct_sip: fix IPv6 address parsing

Within SIP messages IPv6 addresses are enclosed in square brackets in most
cases, with the exception of the "received=" header parameter. Currently
the helper fails to parse enclosed addresses.

This patch:

- changes the SIP address parsing function to enforce square brackets
  when required, and accept them when not required but present, as
  recommended by RFC 5118.

- adds a new SDP address parsing function that never accepts square
  brackets since SDP doesn't use them.

With these changes, the SIP helper correctly parses all test messages
from RFC 5118 (Session Initiation Protocol (SIP) Torture Test Messages
for Internet Protocol Version 6 (IPv6)).

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_sip.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index 0dfc8b7210a3..89f2a627f3f0 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -164,7 +164,7 @@ extern int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr
 				      unsigned int dataoff, unsigned int datalen,
 				      const char *name,
 				      unsigned int *matchoff, unsigned int *matchlen,
-				      union nf_inet_addr *addr);
+				      union nf_inet_addr *addr, bool delim);
 extern int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
 					unsigned int off, unsigned int datalen,
 					const char *name,
-- 
cgit v1.2.3


From 9d8dad742ad1c74d7e7210ee05d0b44961d5ea16 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 9 Aug 2012 19:01:26 -0700
Subject: Yama: higher restrictions should block PTRACE_TRACEME

The higher ptrace restriction levels should be blocking even
PTRACE_TRACEME requests. The comments in the LSM documentation are
misleading about when the checks happen (the parent does not go through
security_ptrace_access_check() on a PTRACE_TRACEME call).

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org # 3.5.x and later
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 include/linux/security.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 4e5a73cdbbef..3dea6a9d568f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1242,8 +1242,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	Check that the @parent process has sufficient permission to trace the
  *	current process before allowing the current process to present itself
  *	to the @parent process for tracing.
- *	The parent process will still have to undergo the ptrace_access_check
- *	checks before it is allowed to trace this one.
  *	@parent contains the task_struct structure for debugger process.
  *	Return 0 if permission is granted.
  * @capget:
-- 
cgit v1.2.3


From ac9dad93164c603e5efc4e57727e929799861a9f Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Fri, 6 Jul 2012 09:12:44 -0300
Subject: [media] omap3isp: preview: Merge gamma correction and gamma bypass

Enabling gamma bypass disables gamma correction and vice versa. Merge
the two parameters.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Sakari Ailus <sakari.ailus@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/omap3isp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/omap3isp.h b/include/linux/omap3isp.h
index e7a79db3c1f7..0cddaa9d08bb 100644
--- a/include/linux/omap3isp.h
+++ b/include/linux/omap3isp.h
@@ -428,7 +428,7 @@ struct omap3isp_ccdc_update_config {
 #define OMAP3ISP_PREV_COLOR_CONV	(1 << 8)
 #define OMAP3ISP_PREV_YC_LIMIT		(1 << 9)
 #define OMAP3ISP_PREV_DEFECT_COR	(1 << 10)
-#define OMAP3ISP_PREV_GAMMABYPASS	(1 << 11)
+/* Bit 11 was OMAP3ISP_PREV_GAMMABYPASS, now merged with OMAP3ISP_PREV_GAMMA */
 #define OMAP3ISP_PREV_DRK_FRM_CAPTURE	(1 << 12)
 #define OMAP3ISP_PREV_DRK_FRM_SUBTRACT	(1 << 13)
 #define OMAP3ISP_PREV_LENS_SHADING	(1 << 14)
-- 
cgit v1.2.3


From 6fd206cb6ebd59e42b376b9e2e8f40d90910757e Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Mon, 18 Jun 2012 11:24:48 -0300
Subject: [media] omap3isp: preview: Add support for non-GRBG Bayer patterns

Rearrange the CFA interpolation coefficients table based on the Bayer
pattern. Support for non-Bayer CFA patterns is dropped as they were not
correctly supported, and have never been tested.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Sakari Ailus <sakari.ailus@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/omap3isp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/omap3isp.h b/include/linux/omap3isp.h
index 0cddaa9d08bb..c090cf9249bb 100644
--- a/include/linux/omap3isp.h
+++ b/include/linux/omap3isp.h
@@ -437,6 +437,7 @@ struct omap3isp_ccdc_update_config {
 
 #define OMAP3ISP_PREV_NF_TBL_SIZE	64
 #define OMAP3ISP_PREV_CFA_TBL_SIZE	576
+#define OMAP3ISP_PREV_CFA_BLK_SIZE	(OMAP3ISP_PREV_CFA_TBL_SIZE / 4)
 #define OMAP3ISP_PREV_GAMMA_TBL_SIZE	1024
 #define OMAP3ISP_PREV_YENH_TBL_SIZE	128
 
@@ -478,7 +479,7 @@ struct omap3isp_prev_cfa {
 	enum omap3isp_cfa_fmt format;
 	__u8 gradthrs_vert;
 	__u8 gradthrs_horz;
-	__u32 table[OMAP3ISP_PREV_CFA_TBL_SIZE];
+	__u32 table[4][OMAP3ISP_PREV_CFA_BLK_SIZE];
 };
 
 /**
-- 
cgit v1.2.3


From 224b6642f5e82a1b21f6b552c799fa02e527d542 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Sun, 12 Aug 2012 22:33:21 -0300
Subject: [media] add DTMB support for DVB API

Cc: Patrick Boettcher <pboettcher@kernellabs.com>
Cc: Andreas Oberritter <obi@linuxtv.org>
Cc: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Patrick Boettcher <pboettcher@kernellabs.com>
Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/dvb/frontend.h | 21 ++++++++++++++++++---
 include/linux/dvb/version.h  |  2 +-
 2 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index f50d4058c5fb..2dd5823b59b9 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -152,6 +152,7 @@ typedef enum fe_code_rate {
 	FEC_AUTO,
 	FEC_3_5,
 	FEC_9_10,
+	FEC_2_5,
 } fe_code_rate_t;
 
 
@@ -169,6 +170,7 @@ typedef enum fe_modulation {
 	APSK_16,
 	APSK_32,
 	DQPSK,
+	QAM_4_NR,
 } fe_modulation_t;
 
 typedef enum fe_transmit_mode {
@@ -179,6 +181,8 @@ typedef enum fe_transmit_mode {
 	TRANSMISSION_MODE_1K,
 	TRANSMISSION_MODE_16K,
 	TRANSMISSION_MODE_32K,
+	TRANSMISSION_MODE_C1,
+	TRANSMISSION_MODE_C3780,
 } fe_transmit_mode_t;
 
 #if defined(__DVB_CORE__) || !defined (__KERNEL__)
@@ -202,6 +206,9 @@ typedef enum fe_guard_interval {
 	GUARD_INTERVAL_1_128,
 	GUARD_INTERVAL_19_128,
 	GUARD_INTERVAL_19_256,
+	GUARD_INTERVAL_PN420,
+	GUARD_INTERVAL_PN595,
+	GUARD_INTERVAL_PN945,
 } fe_guard_interval_t;
 
 
@@ -213,6 +220,11 @@ typedef enum fe_hierarchy {
 	HIERARCHY_AUTO
 } fe_hierarchy_t;
 
+enum fe_interleaving {
+	INTERLEAVING_NONE,
+	INTERLEAVING_240,
+	INTERLEAVING_720,
+};
 
 #if defined(__DVB_CORE__) || !defined (__KERNEL__)
 struct dvb_qpsk_parameters {
@@ -337,7 +349,9 @@ struct dvb_frontend_event {
 #define DTV_ATSCMH_SCCC_CODE_MODE_C	58
 #define DTV_ATSCMH_SCCC_CODE_MODE_D	59
 
-#define DTV_MAX_COMMAND				DTV_ATSCMH_SCCC_CODE_MODE_D
+#define DTV_INTERLEAVING			60
+
+#define DTV_MAX_COMMAND				DTV_INTERLEAVING
 
 typedef enum fe_pilot {
 	PILOT_ON,
@@ -366,7 +380,7 @@ typedef enum fe_delivery_system {
 	SYS_ISDBC,
 	SYS_ATSC,
 	SYS_ATSCMH,
-	SYS_DMBTH,
+	SYS_DTMB,
 	SYS_CMMB,
 	SYS_DAB,
 	SYS_DVBT2,
@@ -374,8 +388,9 @@ typedef enum fe_delivery_system {
 	SYS_DVBC_ANNEX_C,
 } fe_delivery_system_t;
 
-
+/* backward compatibility */
 #define SYS_DVBC_ANNEX_AC	SYS_DVBC_ANNEX_A
+#define SYS_DMBTH SYS_DTMB /* DMB-TH is legacy name, use DTMB instead */
 
 /* ATSC-MH */
 
diff --git a/include/linux/dvb/version.h b/include/linux/dvb/version.h
index 43d9e8d462d4..70c2c7edcc7d 100644
--- a/include/linux/dvb/version.h
+++ b/include/linux/dvb/version.h
@@ -24,6 +24,6 @@
 #define _DVBVERSION_H_
 
 #define DVB_API_VERSION 5
-#define DVB_API_VERSION_MINOR 6
+#define DVB_API_VERSION_MINOR 7
 
 #endif /*_DVBVERSION_H_*/
-- 
cgit v1.2.3


From 8746adda9eec9da9a2c5c2944740163628bd1d68 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Sun, 12 Aug 2012 22:33:22 -0300
Subject: [media] DVB API: add INTERLEAVING_AUTO

After thinking twice, I ended up adding own value for AUTO
interleaving instead of using NONE.

API minor number is not needed to increase as that patch should
be the same Kernel as interleaving parameter is initially added.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/dvb/frontend.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 2dd5823b59b9..c92b4d64e013 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -222,6 +222,7 @@ typedef enum fe_hierarchy {
 
 enum fe_interleaving {
 	INTERLEAVING_NONE,
+	INTERLEAVING_AUTO,
 	INTERLEAVING_240,
 	INTERLEAVING_720,
 };
-- 
cgit v1.2.3


From 0d27bbfe81cb087748dc1511683bd3e7335a7da5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 13 Aug 2012 17:03:12 -0300
Subject: [media] frontend.h, Docbook: Improve status documentation

No functional changes. It just improves the description of the frontend
status, using Documentation/kernel-doc-nano-HOWTO.txt for the status
enumeration, and a table inside the DocBook.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/dvb/frontend.h | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index c92b4d64e013..bb51edfc72a2 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -121,16 +121,27 @@ typedef enum fe_sec_mini_cmd {
 } fe_sec_mini_cmd_t;
 
 
+/**
+ * enum fe_status - enumerates the possible frontend status
+ * @FE_HAS_SIGNAL:	found something above the noise level
+ * @FE_HAS_CARRIER:	found a DVB signal
+ * @FE_HAS_VITERBI:	FEC is stable
+ * @FE_HAS_SYNC:	found sync bytes
+ * @FE_HAS_LOCK:	everything's working
+ * @FE_TIMEDOUT:	no lock within the last ~2 seconds
+ * @FE_REINIT:		frontend was reinitialized, application is recommended
+ *			to reset DiSEqC, tone and parameters
+ */
+
 typedef enum fe_status {
-	FE_HAS_SIGNAL	= 0x01,   /* found something above the noise level */
-	FE_HAS_CARRIER	= 0x02,   /* found a DVB signal  */
-	FE_HAS_VITERBI	= 0x04,   /* FEC is stable  */
-	FE_HAS_SYNC	= 0x08,   /* found sync bytes  */
-	FE_HAS_LOCK	= 0x10,   /* everything's working... */
-	FE_TIMEDOUT	= 0x20,   /* no lock within the last ~2 seconds */
-	FE_REINIT	= 0x40    /* frontend was reinitialized,  */
-} fe_status_t;			  /* application is recommended to reset */
-				  /* DiSEqC, tone and parameters */
+	FE_HAS_SIGNAL		= 0x01,
+	FE_HAS_CARRIER		= 0x02,
+	FE_HAS_VITERBI		= 0x04,
+	FE_HAS_SYNC		= 0x08,
+	FE_HAS_LOCK		= 0x10,
+	FE_TIMEDOUT		= 0x20,
+	FE_REINIT		= 0x40,
+} fe_status_t;
 
 typedef enum fe_spectral_inversion {
 	INVERSION_OFF,
-- 
cgit v1.2.3


From f026cfa82f628db24b8cea41b9d6202af104cecb Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 14 Aug 2012 09:53:38 -0700
Subject: Revert "x86-64/efi: Use EFI to deal with platform wall clock"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit bacef661acdb634170a8faddbc1cf28e8f8b9eee.

This commit has been found to cause serious regressions on a number of
ASUS machines at the least.  We probably need to provide a 1:1 map in
addition to the EFI virtual memory map in order for this to work.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Reported-and-bisected-by: Jérôme Carretero <cJ-ko@zougloub.eu>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Matt Fleming <matt.fleming@intel.com>
Cc: Matthew Garrett <mjg@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120805172903.5f8bb24c@zougloub.eu
---
 include/linux/efi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 103adc6d7e3a..ec45ccd8708a 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -503,6 +503,8 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
 extern int __init efi_uart_console_only (void);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
+extern unsigned long efi_get_time(void);
+extern int efi_set_rtc_mmss(unsigned long nowtime);
 extern void efi_reserve_boot_services(void);
 extern struct efi_memory_map memmap;
 
-- 
cgit v1.2.3


From 47be03a28cc6c80e3aa2b3e8ed6d960ff0c5c0af Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:37 +0000
Subject: netpoll: use GFP_ATOMIC in slave_enable_netpoll() and
 __netpoll_setup()

slave_enable_netpoll() and __netpoll_setup() may be called
with read_lock() held, so should use GFP_ATOMIC to allocate
memory. Eric suggested to pass gfp flags to __netpoll_setup().

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 ++-
 include/linux/netpoll.h   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a9db4f33407f..3560d688161e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -953,7 +953,8 @@ struct net_device_ops {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*ndo_poll_controller)(struct net_device *dev);
 	int			(*ndo_netpoll_setup)(struct net_device *dev,
-						     struct netpoll_info *info);
+						     struct netpoll_info *info,
+						     gfp_t gfp);
 	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
 #endif
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 28f5389c924b..bf2d51eec0f3 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -43,7 +43,7 @@ struct netpoll_info {
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
 void netpoll_print_options(struct netpoll *np);
 int netpoll_parse_options(struct netpoll *np, char *opt);
-int __netpoll_setup(struct netpoll *np, struct net_device *ndev);
+int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp);
 int netpoll_setup(struct netpoll *np);
 int netpoll_trap(void);
 void netpoll_set_trap(int trap);
-- 
cgit v1.2.3


From 38e6bc185d9544dfad1774b3f8902a0b061aea25 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:38 +0000
Subject: netpoll: make __netpoll_cleanup non-block

Like the previous patch, slave_disable_netpoll() and __netpoll_cleanup()
may be called with read_lock() held too, so we should make them
non-block, by moving the cleanup and kfree() to call_rcu_bh() callbacks.

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index bf2d51eec0f3..907812efb4d9 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -23,6 +23,7 @@ struct netpoll {
 	u8 remote_mac[ETH_ALEN];
 
 	struct list_head rx; /* rx_np list element */
+	struct rcu_head rcu;
 };
 
 struct netpoll_info {
@@ -38,6 +39,7 @@ struct netpoll_info {
 	struct delayed_work tx_work;
 
 	struct netpoll *netpoll;
+	struct rcu_head rcu;
 };
 
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
@@ -48,6 +50,7 @@ int netpoll_setup(struct netpoll *np);
 int netpoll_trap(void);
 void netpoll_set_trap(int trap);
 void __netpoll_cleanup(struct netpoll *np);
+void __netpoll_free_rcu(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
 int __netpoll_rx(struct sk_buff *skb);
 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
-- 
cgit v1.2.3


From 57c5d46191e75312934c00eba65b13a31ca95120 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:40 +0000
Subject: netpoll: take rcu_read_lock_bh() in netpoll_rx()

In __netpoll_rx(), it dereferences ->npinfo without rcu_dereference_bh(),
this patch fixes it by using the 'npinfo' passed from netpoll_rx()
where it is already dereferenced with rcu_dereference_bh().

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 907812efb4d9..5d881c388273 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -52,7 +52,7 @@ void netpoll_set_trap(int trap);
 void __netpoll_cleanup(struct netpoll *np);
 void __netpoll_free_rcu(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
-int __netpoll_rx(struct sk_buff *skb);
+int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo);
 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 			     struct net_device *dev);
 static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
@@ -77,7 +77,7 @@ static inline bool netpoll_rx(struct sk_buff *skb)
 
 	spin_lock(&npinfo->rx_lock);
 	/* check rx_flags again with the lock held */
-	if (npinfo->rx_flags && __netpoll_rx(skb))
+	if (npinfo->rx_flags && __netpoll_rx(skb, npinfo))
 		ret = true;
 	spin_unlock(&npinfo->rx_lock);
 
-- 
cgit v1.2.3


From 91fe4a4b9e490a24f6702dd8afe72d8afab6fcdb Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:41 +0000
Subject: netpoll: use netpoll_rx_on() in netpoll_rx()

The logic of the code is same, just call netpoll_rx_on().

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 5d881c388273..2d178baa49df 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -63,6 +63,13 @@ static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 
 
 #ifdef CONFIG_NETPOLL
+static inline int netpoll_rx_on(struct sk_buff *skb)
+{
+	struct netpoll_info *npinfo = rcu_dereference_bh(skb->dev->npinfo);
+
+	return npinfo && (!list_empty(&npinfo->rx_np) || npinfo->rx_flags);
+}
+
 static inline bool netpoll_rx(struct sk_buff *skb)
 {
 	struct netpoll_info *npinfo;
@@ -70,11 +77,11 @@ static inline bool netpoll_rx(struct sk_buff *skb)
 	bool ret = false;
 
 	local_irq_save(flags);
-	npinfo = rcu_dereference_bh(skb->dev->npinfo);
 
-	if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags))
+	if (!netpoll_rx_on(skb))
 		goto out;
 
+	npinfo = rcu_dereference_bh(skb->dev->npinfo);
 	spin_lock(&npinfo->rx_lock);
 	/* check rx_flags again with the lock held */
 	if (npinfo->rx_flags && __netpoll_rx(skb, npinfo))
@@ -86,13 +93,6 @@ out:
 	return ret;
 }
 
-static inline int netpoll_rx_on(struct sk_buff *skb)
-{
-	struct netpoll_info *npinfo = rcu_dereference_bh(skb->dev->npinfo);
-
-	return npinfo && (!list_empty(&npinfo->rx_np) || npinfo->rx_flags);
-}
-
 static inline int netpoll_receive_skb(struct sk_buff *skb)
 {
 	if (!list_empty(&skb->dev->napi_list))
-- 
cgit v1.2.3


From 2899656b494dcd118123af1126826b115c8ea6f9 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:42 +0000
Subject: netpoll: take rcu_read_lock_bh() in netpoll_send_skb_on_dev()

This patch fixes several problems in the call path of
netpoll_send_skb_on_dev():

1. Disable IRQ's before calling netpoll_send_skb_on_dev().

2. All the callees of netpoll_send_skb_on_dev() should use
   rcu_dereference_bh() to dereference ->npinfo.

3. Rename arp_reply() to netpoll_arp_reply(), the former is too generic.

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 2d178baa49df..61aee86cf21d 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -57,7 +57,10 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 			     struct net_device *dev);
 static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
+	unsigned long flags;
+	local_irq_save(flags);
 	netpoll_send_skb_on_dev(np, skb, np->dev);
+	local_irq_restore(flags);
 }
 
 
-- 
cgit v1.2.3


From e15c3c2294605f09f9b336b2f3b97086ab4b8145 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:45 +0000
Subject: netpoll: check netpoll tx status on the right device

Although this doesn't matter actually, because netpoll_tx_running()
doesn't use the parameter, the code will be more readable.

For team_dev_queue_xmit() we have to move it down to avoid
compile errors.

Cc: David Miller <davem@davemloft.net>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_team.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 6960fc1841a7..aa2e167e1ef4 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -96,21 +96,6 @@ static inline void team_netpoll_send_skb(struct team_port *port,
 }
 #endif
 
-static inline int team_dev_queue_xmit(struct team *team, struct team_port *port,
-				      struct sk_buff *skb)
-{
-	BUILD_BUG_ON(sizeof(skb->queue_mapping) !=
-		     sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping));
-	skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping);
-
-	skb->dev = port->dev;
-	if (unlikely(netpoll_tx_running(port->dev))) {
-		team_netpoll_send_skb(port, skb);
-		return 0;
-	}
-	return dev_queue_xmit(skb);
-}
-
 struct team_mode_ops {
 	int (*init)(struct team *team);
 	void (*exit)(struct team *team);
@@ -200,6 +185,21 @@ struct team {
 	long mode_priv[TEAM_MODE_PRIV_LONGS];
 };
 
+static inline int team_dev_queue_xmit(struct team *team, struct team_port *port,
+				      struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(skb->queue_mapping) !=
+		     sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping));
+	skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping);
+
+	skb->dev = port->dev;
+	if (unlikely(netpoll_tx_running(team->dev))) {
+		team_netpoll_send_skb(port, skb);
+		return 0;
+	}
+	return dev_queue_xmit(skb);
+}
+
 static inline struct hlist_head *team_port_index_hash(struct team *team,
 						      int port_index)
 {
-- 
cgit v1.2.3


From 77ab8a54d9a8dcc4a46484a04133314f33f2aba6 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:46 +0000
Subject: netpoll: convert several functions to bool

These functions are just boolean, let them return
bool instead of int.

Cc: David Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 61aee86cf21d..66d5379c305e 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -66,7 +66,7 @@ static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 
 
 #ifdef CONFIG_NETPOLL
-static inline int netpoll_rx_on(struct sk_buff *skb)
+static inline bool netpoll_rx_on(struct sk_buff *skb)
 {
 	struct netpoll_info *npinfo = rcu_dereference_bh(skb->dev->npinfo);
 
@@ -125,7 +125,7 @@ static inline void netpoll_poll_unlock(void *have)
 	}
 }
 
-static inline int netpoll_tx_running(struct net_device *dev)
+static inline bool netpoll_tx_running(struct net_device *dev)
 {
 	return irqs_disabled();
 }
@@ -133,11 +133,11 @@ static inline int netpoll_tx_running(struct net_device *dev)
 #else
 static inline bool netpoll_rx(struct sk_buff *skb)
 {
-	return 0;
+	return false;
 }
-static inline int netpoll_rx_on(struct sk_buff *skb)
+static inline bool netpoll_rx_on(struct sk_buff *skb)
 {
-	return 0;
+	return false;
 }
 static inline int netpoll_receive_skb(struct sk_buff *skb)
 {
@@ -153,9 +153,9 @@ static inline void netpoll_poll_unlock(void *have)
 static inline void netpoll_netdev_init(struct net_device *dev)
 {
 }
-static inline int netpoll_tx_running(struct net_device *dev)
+static inline bool netpoll_tx_running(struct net_device *dev)
 {
-	return 0;
+	return false;
 }
 #endif
 
-- 
cgit v1.2.3


From 4e8b14526ca7fb046a81c94002c1c43b6fdf0e9b Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 8 Aug 2012 15:36:20 -0400
Subject: time: Improve sanity checking of timekeeping inputs

Unexpected behavior could occur if the time is set to a value large
enough to overflow a 64bit ktime_t (which is something larger then the
year 2262).

Also unexpected behavior could occur if large negative offsets are
injected via adjtimex.

So this patch improves the sanity check timekeeping inputs by
improving the timespec_valid() check, and then makes better use of
timespec_valid() to make sure we don't set the time to an invalid
negative value or one that overflows ktime_t.

Note: This does not protect from setting the time close to overflowing
ktime_t and then letting natural accumulation cause the overflow.

Reported-by: CAI Qian <caiqian@redhat.com>
Reported-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Zhouping Liu <zliu@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1344454580-17031-1-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/ktime.h |  7 -------
 include/linux/time.h  | 22 ++++++++++++++++++++--
 2 files changed, 20 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 603bec2913b0..06177ba10a16 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -58,13 +58,6 @@ union ktime {
 
 typedef union ktime ktime_t;		/* Kill this */
 
-#define KTIME_MAX			((s64)~((u64)1 << 63))
-#if (BITS_PER_LONG == 64)
-# define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
-#else
-# define KTIME_SEC_MAX			LONG_MAX
-#endif
-
 /*
  * ktime_t definitions when using the 64-bit scalar representation:
  */
diff --git a/include/linux/time.h b/include/linux/time.h
index c81c5e40fcb5..b0bbd8f0130d 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -107,11 +107,29 @@ static inline struct timespec timespec_sub(struct timespec lhs,
 	return ts_delta;
 }
 
+#define KTIME_MAX			((s64)~((u64)1 << 63))
+#if (BITS_PER_LONG == 64)
+# define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
+#else
+# define KTIME_SEC_MAX			LONG_MAX
+#endif
+
 /*
  * Returns true if the timespec is norm, false if denorm:
  */
-#define timespec_valid(ts) \
-	(((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC))
+static inline bool timespec_valid(const struct timespec *ts)
+{
+	/* Dates before 1970 are bogus */
+	if (ts->tv_sec < 0)
+		return false;
+	/* Can't have more nanoseconds then a second */
+	if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
+		return false;
+	/* Disallow values that could overflow ktime_t */
+	if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
+		return false;
+	return true;
+}
 
 extern void read_persistent_clock(struct timespec *ts);
 extern void read_boot_clock(struct timespec *ts);
-- 
cgit v1.2.3


From 58569aee5a1a5dcc25c34a0a2ed9a377874e6b05 Mon Sep 17 00:00:00 2001
From: "Arnaud Patard (Rtp)" <arnaud.patard@rtp-net.org>
Date: Thu, 26 Jul 2012 12:15:46 +0200
Subject: ARM: Orion: Set eth packet size csum offload limit

The mv643xx ethernet controller limits the packet size for the TX
checksum offloading. This patch sets this limits for Kirkwood and
Dove which have smaller limits that the default.

As a side note, this patch is an updated version of a patch sent some years
ago: http://lists.infradead.org/pipermail/linux-arm-kernel/2010-June/017320.html
which seems to have been lost.

Signed-off-by: Arnaud Patard <arnaud.patard@rtp-net.org>
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
Cc: <stable@vger.kernel.org>
---
 include/linux/mv643xx_eth.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h
index 51bf8ada6dc0..49258e0ed1c6 100644
--- a/include/linux/mv643xx_eth.h
+++ b/include/linux/mv643xx_eth.h
@@ -15,6 +15,8 @@
 #define MV643XX_ETH_SIZE_REG_4		0x2224
 #define MV643XX_ETH_BASE_ADDR_ENABLE_REG	0x2290
 
+#define MV643XX_TX_CSUM_DEFAULT_LIMIT	0
+
 struct mv643xx_eth_shared_platform_data {
 	struct mbus_dram_target_info	*dram;
 	struct platform_device	*shared_smi;
-- 
cgit v1.2.3


From 8857df3aceb7a8eb7558059b7da109e41dd1fb95 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Fri, 20 Jul 2012 09:31:00 +0100
Subject: iio: frequency: ADF4350: Fix potential reference div factor overflow.

With small channel spacing values and high reference frequencies it is
possible to exceed the range of the 10-bit counter.
Workaround by checking the range and widening some constrains.

We don't use the REG1_PHASE value in this case the datasheet recommends to set
it to 1 if not used.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/frequency/adf4350.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/frequency/adf4350.h b/include/linux/iio/frequency/adf4350.h
index b76b4a87065e..be91f344d5fc 100644
--- a/include/linux/iio/frequency/adf4350.h
+++ b/include/linux/iio/frequency/adf4350.h
@@ -87,6 +87,8 @@
 #define ADF4350_MAX_BANDSEL_CLK		125000 /* Hz */
 #define ADF4350_MAX_FREQ_REFIN		250000000 /* Hz */
 #define ADF4350_MAX_MODULUS		4095
+#define ADF4350_MAX_R_CNT		1023
+
 
 /**
  * struct adf4350_platform_data - platform specific information
-- 
cgit v1.2.3


From ac5aa7f9e0891a115ab307b4bdde9c55b9232970 Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Fri, 10 Aug 2012 16:52:58 +0200
Subject: pinctrl: header: trivial: declare struct device

As struct device is used as a function argument, it should at
least be declared (device.h is not included).

Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/consumer.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h
index 6dd96fb45482..e9b7f4350844 100644
--- a/include/linux/pinctrl/consumer.h
+++ b/include/linux/pinctrl/consumer.h
@@ -20,6 +20,7 @@
 /* This struct is private to the core and should be regarded as a cookie */
 struct pinctrl;
 struct pinctrl_state;
+struct device;
 
 #ifdef CONFIG_PINCTRL
 
-- 
cgit v1.2.3


From 3296193d1421c2d6f9e49e181cecfd917f0f5764 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Tue, 14 Aug 2012 13:20:23 +0000
Subject: dt: introduce for_each_available_child_of_node,
 of_get_next_available_child

Macro for_each_child_of_node() makes it easy to iterate over all of the
children for a given device tree node, including those nodes that are
marked as unavailable (i.e. status = "disabled").

Introduce for_each_available_child_of_node(), which is like
for_each_child_of_node(), but it automatically skips unavailable nodes.
This also requires the introduction of helper function
of_get_next_available_child(), which returns the next available child
node.

Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 5919ee33f2b7..1b1163225f3b 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -190,10 +190,17 @@ extern struct device_node *of_get_parent(const struct device_node *node);
 extern struct device_node *of_get_next_parent(struct device_node *node);
 extern struct device_node *of_get_next_child(const struct device_node *node,
 					     struct device_node *prev);
+extern struct device_node *of_get_next_available_child(
+	const struct device_node *node, struct device_node *prev);
+
 #define for_each_child_of_node(parent, child) \
 	for (child = of_get_next_child(parent, NULL); child != NULL; \
 	     child = of_get_next_child(parent, child))
 
+#define for_each_available_child_of_node(parent, child) \
+	for (child = of_get_next_available_child(parent, NULL); child != NULL; \
+	     child = of_get_next_available_child(parent, child))
+
 static inline int of_get_child_count(const struct device_node *np)
 {
 	struct device_node *child;
-- 
cgit v1.2.3


From c0de08d04215031d68fa13af36f347a6cfa252ca Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@regit.org>
Date: Thu, 16 Aug 2012 22:02:58 +0000
Subject: af_packet: don't emit packet on orig fanout group

If a packet is emitted on one socket in one group of fanout sockets,
it is transmitted again. It is thus read again on one of the sockets
of the fanout group. This result in a loop for software which
generate packets when receiving one.
This retransmission is not the intended behavior: a fanout group
must behave like a single socket. The packet should not be
transmitted on a socket if it originates from a socket belonging
to the same fanout group.

This patch fixes the issue by changing the transmission check to
take fanout group info account.

Reported-by: Aleksandr Kotov <a1k@mail.ru>
Signed-off-by: Eric Leblond <eric@regit.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3560d688161e..59dc05f38247 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1522,6 +1522,8 @@ struct packet_type {
 	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
 					       struct sk_buff *skb);
 	int			(*gro_complete)(struct sk_buff *skb);
+	bool			(*id_match)(struct packet_type *ptype,
+					    struct sock *sk);
 	void			*af_packet_priv;
 	struct list_head	list;
 };
-- 
cgit v1.2.3


From c3a5ce0416b6c172a23bc8a3760d8704d3d1535b Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 21 Aug 2012 16:16:00 -0700
Subject: string: do not export memweight() to userspace

Fix the following warning:

  usr/include/linux/string.h:8: userspace cannot reference function or variable defined in the kernel

Signed-off-by: WANG Cong <xiyou.wangcong@gmail.com>
Acked-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index ffe0442e18d2..b9178812d9df 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -144,8 +144,8 @@ static inline bool strstarts(const char *str, const char *prefix)
 {
 	return strncmp(str, prefix, strlen(prefix)) == 0;
 }
-#endif
 
 extern size_t memweight(const void *ptr, size_t bytes);
 
+#endif /* __KERNEL__ */
 #endif /* _LINUX_STRING_H_ */
-- 
cgit v1.2.3


From c67fe3752abe6ab47639e2f9b836900c3dc3da84 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 21 Aug 2012 16:16:17 -0700
Subject: mm: compaction: Abort async compaction if locks are contended or
 taking too long

Jim Schutt reported a problem that pointed at compaction contending
heavily on locks.  The workload is straight-forward and in his own words;

	The systems in question have 24 SAS drives spread across 3 HBAs,
	running 24 Ceph OSD instances, one per drive.  FWIW these servers
	are dual-socket Intel 5675 Xeons w/48 GB memory.  I've got ~160
	Ceph Linux clients doing dd simultaneously to a Ceph file system
	backed by 12 of these servers.

Early in the test everything looks fine

  procs -------------------memory------------------ ---swap-- -----io---- --system-- -----cpu-------
   r  b       swpd       free       buff      cache   si   so    bi    bo   in   cs  us sy  id wa st
  31 15          0     287216        576   38606628    0    0     2  1158    2   14   1  3  95  0  0
  27 15          0     225288        576   38583384    0    0    18 2222016 203357 134876  11 56  17 15  0
  28 17          0     219256        576   38544736    0    0    11 2305932 203141 146296  11 49  23 17  0
   6 18          0     215596        576   38552872    0    0     7 2363207 215264 166502  12 45  22 20  0
  22 18          0     226984        576   38596404    0    0     3 2445741 223114 179527  12 43  23 22  0

and then it goes to pot

  procs -------------------memory------------------ ---swap-- -----io---- --system-- -----cpu-------
   r  b       swpd       free       buff      cache   si   so    bi    bo   in   cs  us sy  id wa st
  163  8          0     464308        576   36791368    0    0    11 22210  866  536   3 13  79  4  0
  207 14          0     917752        576   36181928    0    0   712 1345376 134598 47367   7 90   1  2  0
  123 12          0     685516        576   36296148    0    0   429 1386615 158494 60077   8 84   5  3  0
  123 12          0     598572        576   36333728    0    0  1107 1233281 147542 62351   7 84   5  4  0
  622  7          0     660768        576   36118264    0    0   557 1345548 151394 59353   7 85   4  3  0
  223 11          0     283960        576   36463868    0    0    46 1107160 121846 33006   6 93   1  1  0

Note that system CPU usage is very high blocks being written out has
dropped by 42%. He analysed this with perf and found

  perf record -g -a sleep 10
  perf report --sort symbol --call-graph fractal,5
    34.63%  [k] _raw_spin_lock_irqsave
            |
            |--97.30%-- isolate_freepages
            |          compaction_alloc
            |          unmap_and_move
            |          migrate_pages
            |          compact_zone
            |          compact_zone_order
            |          try_to_compact_pages
            |          __alloc_pages_direct_compact
            |          __alloc_pages_slowpath
            |          __alloc_pages_nodemask
            |          alloc_pages_vma
            |          do_huge_pmd_anonymous_page
            |          handle_mm_fault
            |          do_page_fault
            |          page_fault
            |          |
            |          |--87.39%-- skb_copy_datagram_iovec
            |          |          tcp_recvmsg
            |          |          inet_recvmsg
            |          |          sock_recvmsg
            |          |          sys_recvfrom
            |          |          system_call
            |          |          __recv
            |          |          |
            |          |           --100.00%-- (nil)
            |          |
            |           --12.61%-- memcpy
             --2.70%-- [...]

There was other data but primarily it is all showing that compaction is
contended heavily on the zone->lock and zone->lru_lock.

commit [b2eef8c0: mm: compaction: minimise the time IRQs are disabled
while isolating pages for migration] noted that it was possible for
migration to hold the lru_lock for an excessive amount of time. Very
broadly speaking this patch expands the concept.

This patch introduces compact_checklock_irqsave() to check if a lock
is contended or the process needs to be scheduled. If either condition
is true then async compaction is aborted and the caller is informed.
The page allocator will fail a THP allocation if compaction failed due
to contention. This patch also introduces compact_trylock_irqsave()
which will acquire the lock only if it is not contended and the process
does not need to schedule.

Reported-by: Jim Schutt <jaschut@sandia.gov>
Tested-by: Jim Schutt <jaschut@sandia.gov>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 133ddcf83397..ef658147e4e8 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
-			bool sync);
+			bool sync, bool *contended);
 extern int compact_pgdat(pg_data_t *pgdat, int order);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
 
@@ -64,7 +64,7 @@ static inline bool compaction_deferred(struct zone *zone, int order)
 #else
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			bool sync)
+			bool sync, bool *contended)
 {
 	return COMPACT_CONTINUE;
 }
-- 
cgit v1.2.3


From 1922b0f2758badc0b971d1ebbd300bc6635a6aef Mon Sep 17 00:00:00 2001
From: AnilKumar Ch <anilkumar@ti.com>
Date: Mon, 13 Aug 2012 20:36:05 +0530
Subject: mfd: Move tps65217 regulator plat data handling to regulator

Regulator platform data handling was mistakenly added to MFD
driver. So we will see build errors if we compile MFD drivers
without CONFIG_REGULATOR. This patch moves regulator platform
data handling from TPS65217 MFD driver to regulator driver.

This makes MFD driver independent of REGULATOR framework so
build error is fixed if CONFIG_REGULATOR is not set.

drivers/built-in.o: In function `tps65217_probe':
tps65217.c:(.devinit.text+0x13e37): undefined reference
to `of_regulator_match'

This patch also fix allocation size of tps65217 platform data.
Current implementation allocates a struct tps65217_board for each
regulator specified in the device tree. But the structure itself
provides array of regulators so one instance of it is sufficient.

Signed-off-by: AnilKumar Ch <anilkumar@ti.com>
---
 include/linux/mfd/tps65217.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h
index 12c06870829a..7cd83d826ed8 100644
--- a/include/linux/mfd/tps65217.h
+++ b/include/linux/mfd/tps65217.h
@@ -22,6 +22,9 @@
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 
+/* TPS chip id list */
+#define TPS65217			0xF0
+
 /* I2C ID for TPS65217 part */
 #define TPS65217_I2C_ID			0x24
 
@@ -248,13 +251,11 @@ struct tps_info {
 struct tps65217 {
 	struct device *dev;
 	struct tps65217_board *pdata;
+	unsigned int id;
 	struct regulator_desc desc[TPS65217_NUM_REGULATOR];
 	struct regulator_dev *rdev[TPS65217_NUM_REGULATOR];
 	struct tps_info *info[TPS65217_NUM_REGULATOR];
 	struct regmap *regmap;
-
-	/* Client devices */
-	struct platform_device *regulator_pdev[TPS65217_NUM_REGULATOR];
 };
 
 static inline struct tps65217 *dev_to_tps65217(struct device *dev)
@@ -262,6 +263,11 @@ static inline struct tps65217 *dev_to_tps65217(struct device *dev)
 	return dev_get_drvdata(dev);
 }
 
+static inline int tps65217_chip_id(struct tps65217 *tps65217)
+{
+	return tps65217->id;
+}
+
 int tps65217_reg_read(struct tps65217 *tps, unsigned int reg,
 					unsigned int *val);
 int tps65217_reg_write(struct tps65217 *tps, unsigned int reg,
-- 
cgit v1.2.3


From 8ad5db8a8ddbe3bd33078863a027011e28f1f4ee Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 17 Aug 2012 20:10:46 -0400
Subject: introduce kref_put_mutex()

equivalent of
	mutex_lock(mutex);
	if (!kref_put(kref, release))
		mutex_unlock(mutex);

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/kref.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index 9c07dcebded7..65af6887872f 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -18,6 +18,7 @@
 #include <linux/bug.h>
 #include <linux/atomic.h>
 #include <linux/kernel.h>
+#include <linux/mutex.h>
 
 struct kref {
 	atomic_t refcount;
@@ -93,4 +94,21 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)
 {
 	return kref_sub(kref, 1, release);
 }
+
+static inline int kref_put_mutex(struct kref *kref,
+				 void (*release)(struct kref *kref),
+				 struct mutex *lock)
+{
+	WARN_ON(release == NULL);
+        if (unlikely(!atomic_add_unless(&kref->refcount, -1, 1))) {
+		mutex_lock(lock);
+		if (unlikely(!atomic_dec_and_test(&kref->refcount))) {
+			mutex_unlock(lock);
+			return 0;
+		}
+		release(kref);
+		return 1;
+	}
+	return 0;
+}
 #endif /* _KREF_H_ */
-- 
cgit v1.2.3


From c7a9b09b1a4a1fbccb2ec409daec95f9068d77c0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 15 Aug 2012 20:51:54 +0000
Subject: ARM: omap: allow building omap44xx without SMP

The new omap4 cpuidle implementation currently requires
ARCH_NEEDS_CPU_IDLE_COUPLED, which only works on SMP.

This patch makes it possible to build a non-SMP kernel
for that platform. This is not normally desired for
end-users but can be useful for testing.

Without this patch, building rand-0y2jSKT results in:

drivers/cpuidle/coupled.c: In function 'cpuidle_coupled_poke':
drivers/cpuidle/coupled.c:317:3: error: implicit declaration of function '__smp_call_function_single' [-Werror=implicit-function-declaration]

It's not clear if this patch is the best solution for
the problem at hand. I have made sure that we can now
build the kernel in all configurations, but that does
not mean it will actually work on an OMAP44xx.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Tony Lindgren <tony@atomide.com>
---
 include/linux/cpuidle.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 040b13b5c14a..279b1eaa8b73 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -194,6 +194,10 @@ static inline int cpuidle_play_dead(void) {return -ENODEV; }
 
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
 void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a);
+#else
+static inline void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a)
+{
+}
 #endif
 
 /******************************
-- 
cgit v1.2.3


From b969afc8b719bbe3f0842a694e6bf5e87f08868f Mon Sep 17 00:00:00 2001
From: Joachim Eastwood <manabian@gmail.com>
Date: Thu, 23 Aug 2012 18:14:54 +0200
Subject: ASoC: atmel-ssc: include linux/io.h for raw io

Include linux/io.h for raw io operations in atmel-scc header.

This fixes the following build error:
  CC [M]  sound/soc/atmel/atmel_ssc_dai.o
sound/soc/atmel/atmel_ssc_dai.c: In function 'atmel_ssc_interrupt':
sound/soc/atmel/atmel_ssc_dai.c:171: error: implicit declaration of function '__raw_readl'
sound/soc/atmel/atmel_ssc_dai.c: In function 'atmel_ssc_shutdown':
sound/soc/atmel/atmel_ssc_dai.c:249: error: implicit declaration of function '__raw_writel'

Signed-off-by: Joachim Eastwood <joachim.eastwood@jotron.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/atmel-ssc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/atmel-ssc.h b/include/linux/atmel-ssc.h
index 06023393fba9..4eb31752e2b7 100644
--- a/include/linux/atmel-ssc.h
+++ b/include/linux/atmel-ssc.h
@@ -3,6 +3,7 @@
 
 #include <linux/platform_device.h>
 #include <linux/list.h>
+#include <linux/io.h>
 
 struct ssc_device {
 	struct list_head	list;
-- 
cgit v1.2.3


From 5c879d2094946081af934739850c7260e8b25d3c Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalmin@broadcom.com>
Date: Sun, 26 Aug 2012 00:35:45 +0000
Subject: bnx2x: fix 57840_MF pci id

Commit c3def943c7117d42caaed3478731ea7c3c87190e have added support for
new pci ids of the 57840 board, while failing to change the obsolete value
in 'pci_ids.h'.
This patch does so, allowing the probe of such devices.

Signed-off-by: Yuval Mintz <yuvalmin@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pci_ids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index fc3526077348..6b4565c440c8 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2149,7 +2149,7 @@
 #define PCI_DEVICE_ID_TIGON3_5704S	0x16a8
 #define PCI_DEVICE_ID_NX2_57800_VF	0x16a9
 #define PCI_DEVICE_ID_NX2_5706S		0x16aa
-#define PCI_DEVICE_ID_NX2_57840_MF	0x16ab
+#define PCI_DEVICE_ID_NX2_57840_MF	0x16a4
 #define PCI_DEVICE_ID_NX2_5708S		0x16ac
 #define PCI_DEVICE_ID_NX2_57840_VF	0x16ad
 #define PCI_DEVICE_ID_NX2_57810_MF	0x16ae
-- 
cgit v1.2.3


From cee58483cf56e0ba355fdd97ff5e8925329aa936 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Fri, 31 Aug 2012 13:30:06 -0400
Subject: time: Move ktime_t overflow checking into timespec_valid_strict

Andreas Bombe reported that the added ktime_t overflow checking added to
timespec_valid in commit 4e8b14526ca7 ("time: Improve sanity checking of
timekeeping inputs") was causing problems with X.org because it caused
timeouts larger then KTIME_T to be invalid.

Previously, these large timeouts would be clamped to KTIME_MAX and would
never expire, which is valid.

This patch splits the ktime_t overflow checking into a new
timespec_valid_strict function, and converts the timekeeping codes
internal checking to use this more strict function.

Reported-and-tested-by: Andreas Bombe <aeb@debian.org>
Cc: Zhouping Liu <zliu@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/time.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index b0bbd8f0130d..b51e664c83e7 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -125,6 +125,13 @@ static inline bool timespec_valid(const struct timespec *ts)
 	/* Can't have more nanoseconds then a second */
 	if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
 		return false;
+	return true;
+}
+
+static inline bool timespec_valid_strict(const struct timespec *ts)
+{
+	if (!timespec_valid(ts))
+		return false;
 	/* Disallow values that could overflow ktime_t */
 	if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
 		return false;
-- 
cgit v1.2.3


From b6d86d3d6d6e4c9b588d81615c81b5a8292b62ed Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 24 Aug 2012 17:25:01 -0700
Subject: linux/kernel.h: Fix DIV_ROUND_CLOSEST to support negative dividends

DIV_ROUND_CLOSEST returns a bad result for negative dividends:
	DIV_ROUND_CLOSEST(-2, 2) = 0

Most of the time this does not matter. However, in the hardware monitoring
subsystem, DIV_ROUND_CLOSEST is sometimes used on integers which can be
negative (such as temperatures).

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/kernel.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 604382143bcf..594b419b7d20 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -82,10 +82,18 @@
 	__x - (__x % (y));				\
 }							\
 )
+
+/*
+ * Divide positive or negative dividend by positive divisor and round
+ * to closest integer. Result is undefined for negative divisors.
+ */
 #define DIV_ROUND_CLOSEST(x, divisor)(			\
 {							\
-	typeof(divisor) __divisor = divisor;		\
-	(((x) + ((__divisor) / 2)) / (__divisor));	\
+	typeof(x) __x = x;				\
+	typeof(divisor) __d = divisor;			\
+	(((typeof(x))-1) >= 0 || (__x) >= 0) ?		\
+		(((__x) + ((__d) / 2)) / (__d)) :	\
+		(((__x) - ((__d) / 2)) / (__d));	\
 }							\
 )
 
-- 
cgit v1.2.3


From a6fa941d94b411bbd2b6421ffbde6db3c93e65ab Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Mon, 20 Aug 2012 14:59:25 +0100
Subject: perf_event: Switch to internal refcount, fix race with close()

Don't mess with file refcounts (or keep a reference to file, for
that matter) in perf_event.  Use explicit refcount of its own
instead.  Deal with the race between the final reference to event
going away and new children getting created for it by use of
atomic_long_inc_not_zero() in inherit_event(); just have the
latter free what it had allocated and return NULL, that works
out just fine (children of siblings of something doomed are
created as singletons, same as if the child of leader had been
created and immediately killed).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@kernel.org
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120820135925.GG23464@ZenIV.linux.org.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7602ccb3f40e..ad04dfcd6f35 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -926,7 +926,7 @@ struct perf_event {
 	struct hw_perf_event		hw;
 
 	struct perf_event_context	*ctx;
-	struct file			*filp;
+	atomic_long_t			refcount;
 
 	/*
 	 * These accumulate total time (in nanoseconds) that children
-- 
cgit v1.2.3


From 500ad2d8b01390c98bc6dce068bccfa9534b8212 Mon Sep 17 00:00:00 2001
From: "K.Prasad" <Prasad.Krishnan@gmail.com>
Date: Thu, 2 Aug 2012 13:46:35 +0530
Subject: perf/hwpb: Invoke __perf_event_disable() if interrupts are already
 disabled

While debugging a warning message on PowerPC while using hardware
breakpoints, it was discovered that when perf_event_disable is invoked
through hw_breakpoint_handler function with interrupts disabled, a
subsequent IPI in the code path would trigger a WARN_ON_ONCE message in
smp_call_function_single function.

This patch calls __perf_event_disable() when interrupts are already
disabled, instead of perf_event_disable().

Reported-by: Edjunior Barbosa Machado <emachado@linux.vnet.ibm.com>
Signed-off-by: K.Prasad <Prasad.Krishnan@gmail.com>
[naveen.n.rao@linux.vnet.ibm.com: v3: Check to make sure we target current task]
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120802081635.5811.17737.stgit@localhost.localdomain
[ Fixed build error on MIPS. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ad04dfcd6f35..33ed9d605f91 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1296,6 +1296,7 @@ extern int perf_swevent_get_recursion_context(void);
 extern void perf_swevent_put_recursion_context(int rctx);
 extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
+extern int __perf_event_disable(void *info);
 extern void perf_event_task_tick(void);
 #else
 static inline void
@@ -1334,6 +1335,7 @@ static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
 static inline void perf_swevent_put_recursion_context(int rctx)		{ }
 static inline void perf_event_enable(struct perf_event *event)		{ }
 static inline void perf_event_disable(struct perf_event *event)		{ }
+static inline int __perf_event_disable(void *info)			{ return -1; }
 static inline void perf_event_task_tick(void)				{ }
 #endif
 
-- 
cgit v1.2.3


From 3550ccdb9d8d350e526b809bf3dd92b550a74fe1 Mon Sep 17 00:00:00 2001
From: Ian Chen <ian.cy.chen@samsung.com>
Date: Wed, 29 Aug 2012 15:05:36 +0900
Subject: mmc: card: Skip secure erase on MoviNAND; causes unrecoverable
 corruption.

For several MoviNAND eMMC parts, there are known issues with secure
erase and secure trim.  For these specific MoviNAND devices, we skip
these operations.

Specifically, there is a bug in the eMMC firmware that causes
unrecoverable corruption when the MMC is erased with MMC_CAP_ERASE
enabled.

References:

http://forum.xda-developers.com/showthread.php?t=1644364
https://plus.google.com/111398485184813224730/posts/21pTYfTsCkB#111398485184813224730/posts/21pTYfTsCkB

Signed-off-by: Ian Chen <ian.cy.chen@samsung.com>
Reviewed-by: Namjae Jeon <linkinjeon@gmail.com>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: stable <stable@vger.kernel.org> [3.0+]
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 include/linux/mmc/card.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 111aca5e97f3..4b27f9f503e4 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -239,6 +239,7 @@ struct mmc_card {
 #define MMC_QUIRK_BLK_NO_CMD23	(1<<7)		/* Avoid CMD23 for regular multiblock */
 #define MMC_QUIRK_BROKEN_BYTE_MODE_512 (1<<8)	/* Avoid sending 512 bytes in */
 #define MMC_QUIRK_LONG_READ_TIME (1<<9)		/* Data read time > CSD says */
+#define MMC_QUIRK_SEC_ERASE_TRIM_BROKEN (1<<10)	/* Skip secure for erase/trim */
 						/* byte mode */
 	unsigned int    poweroff_notify_state;	/* eMMC4.5 notify feature */
 #define MMC_NO_POWER_NOTIFICATION	0
-- 
cgit v1.2.3


From c3f52af3e03013db5237e339c817beaae5ec9e3a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 3 Sep 2012 14:56:02 -0400
Subject: NFS: Fix the initialisation of the readdir 'cookieverf' array

When the NFS_COOKIEVERF helper macro was converted into a static
inline function in commit 99fadcd764 (nfs: convert NFS_*(inode)
helpers to static inline), we broke the initialisation of the
readdir cookies, since that depended on doing a memset with an
argument of 'sizeof(NFS_COOKIEVERF(inode))' which therefore
changed from sizeof(be32 cookieverf[2]) to sizeof(be32 *).

At this point, NFS_COOKIEVERF seems to be more of an obfuscation
than a helper, so the best thing would be to just get rid of it.

Also see: https://bugzilla.kernel.org/show_bug.cgi?id=46881

Reported-by: Andi Kleen <andi@firstfloor.org>
Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@vger.kernel.org
---
 include/linux/nfs_fs.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1f8fc7f9bcd8..4b03f56e280e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -265,11 +265,6 @@ static inline const struct nfs_rpc_ops *NFS_PROTO(const struct inode *inode)
 	return NFS_SERVER(inode)->nfs_client->rpc_ops;
 }
 
-static inline __be32 *NFS_COOKIEVERF(const struct inode *inode)
-{
-	return NFS_I(inode)->cookieverf;
-}
-
 static inline unsigned NFS_MINATTRTIMEO(const struct inode *inode)
 {
 	struct nfs_server *nfss = NFS_SERVER(inode);
-- 
cgit v1.2.3


From 1f1ea6c2d9d8c0be9ec56454b05315273b5de8ce Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 26 Aug 2012 11:44:43 -0700
Subject: NFSv4: Fix buffer overflow checking in __nfs4_get_acl_uncached

Pass the checks made by decode_getacl back to __nfs4_get_acl_uncached
so that it knows if the acl has been truncated.

The current overflow checking is broken, resulting in Oopses on
user-triggered nfs4_getfacl calls, and is opaque to the point
where several attempts at fixing it have failed.
This patch tries to clean up the code in addition to fixing the
Oopses by ensuring that the overflow checks are performed in
a single place (decode_getacl). If the overflow check failed,
we will still be able to report the acl length, but at least
we will no longer attempt to cache the acl or copy the
truncated contents to user space.

Reported-by: Sachin Prabhu <sprabhu@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Tested-by: Sachin Prabhu <sprabhu@redhat.com>
---
 include/linux/nfs_xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ac7c8ae254f2..be9cf3c7e79e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -652,7 +652,7 @@ struct nfs_getaclargs {
 };
 
 /* getxattr ACL interface flags */
-#define NFS4_ACL_LEN_REQUEST	0x0001	/* zero length getxattr buffer */
+#define NFS4_ACL_TRUNC		0x0001	/* ACL was truncated */
 struct nfs_getaclres {
 	size_t				acl_len;
 	size_t				acl_data_offset;
-- 
cgit v1.2.3


From 60e233a56609fd963c59e99bd75c663d63fa91b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Sun, 2 Sep 2012 15:41:34 +0200
Subject: kobject: fix oops with "input0: bad kobj_uevent_env content in
 show_uevent()"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fengguang Wu <fengguang.wu@intel.com> writes:

> After the __devinit* removal series, I can still get kernel panic in
> show_uevent(). So there are more sources of bug..
>
> Debug patch:
>
> @@ -343,8 +343,11 @@ static ssize_t show_uevent(struct device
>                 goto out;
>
>         /* copy keys to file */
> -       for (i = 0; i < env->envp_idx; i++)
> +       dev_err(dev, "uevent %d env[%d]: %s/.../%s\n", env->buflen, env->envp_idx, top_kobj->name, dev->kobj.name);
> +       for (i = 0; i < env->envp_idx; i++) {
> +               printk(KERN_ERR "uevent %d env[%d]: %s\n", (int)count, i, env->envp[i]);
>                 count += sprintf(&buf[count], "%s\n", env->envp[i]);
> +       }
>
> Oops message, the env[] is again not properly initilized:
>
> [   44.068623] input input0: uevent 61 env[805306368]: input0/.../input0
> [   44.069552] uevent 0 env[0]: (null)

This is a completely different CONFIG_HOTPLUG problem, only
demonstrating another reason why CONFIG_HOTPLUG should go away.  I had a
hard time trying to disable it anyway ;-)

The problem this time is lots of code assuming that a call to
add_uevent_var() will guarantee that env->buflen > 0.  This is not true
if CONFIG_HOTPLUG is unset.  So things like this end up overwriting
env->envp_idx because the array index is -1:

	if (add_uevent_var(env, "MODALIAS="))
		return -ENOMEM;
        len = input_print_modalias(&env->buf[env->buflen - 1],
				   sizeof(env->buf) - env->buflen,
				   dev, 0);

Don't know what the best action is, given that there seem to be a *lot*
of this around the kernel.  This patch "fixes" the problem for me, but I
don't know if it can be considered an appropriate fix.

[ It is the correct fix for now, for 3.7 forcing CONFIG_HOTPLUG to
always be on is the longterm fix, but it's too late for 3.6 and older
kernels to resolve this that way - gregkh ]

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Tested-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kobject.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index fc615a97e2d3..1e57449395b1 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -224,7 +224,7 @@ static inline int kobject_uevent_env(struct kobject *kobj,
 
 static inline __printf(2, 3)
 int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
-{ return 0; }
+{ return -ENOMEM; }
 
 static inline int kobject_action_type(const char *buf, size_t count,
 				      enum kobject_action *type)
-- 
cgit v1.2.3


From f39c1bfb5a03e2d255451bff05be0d7255298fa4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 7 Sep 2012 11:08:50 -0400
Subject: SUNRPC: Fix a UDP transport regression

Commit 43cedbf0e8dfb9c5610eb7985d5f21263e313802 (SUNRPC: Ensure that
we grab the XPRT_LOCK before calling xprt_alloc_slot) is causing
hangs in the case of NFS over UDP mounts.

Since neither the UDP or the RDMA transport mechanism use dynamic slot
allocation, we can skip grabbing the socket lock for those transports.
Add a new rpc_xprt_op to allow switching between the TCP and UDP/RDMA
case.

Note that the NFSv4.1 back channel assigns the slot directly
through rpc_run_bc_task, so we can ignore that case.

Reported-by: Dick Streefland <dick.streefland@altium.nl>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@vger.kernel.org [>= 3.1]
---
 include/linux/sunrpc/xprt.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index cff40aa7db62..bf8c49ff7530 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -114,6 +114,7 @@ struct rpc_xprt_ops {
 	void		(*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
 	int		(*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
 	void		(*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
+	void		(*alloc_slot)(struct rpc_xprt *xprt, struct rpc_task *task);
 	void		(*rpcbind)(struct rpc_task *task);
 	void		(*set_port)(struct rpc_xprt *xprt, unsigned short port);
 	void		(*connect)(struct rpc_task *task);
@@ -281,6 +282,8 @@ void			xprt_connect(struct rpc_task *task);
 void			xprt_reserve(struct rpc_task *task);
 int			xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
 int			xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
+void			xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
+void			xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
 int			xprt_prepare_transmit(struct rpc_task *task);
 void			xprt_transmit(struct rpc_task *task);
 void			xprt_end_transmit(struct rpc_task *task);
-- 
cgit v1.2.3


From a8edc3bf05a3465726afdf635a820761fae0d50b Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Wed, 5 Sep 2012 22:50:48 +0000
Subject: net/mlx4_core: Put Firmware flow steering structures in common header
 files

To allow for usage of the flow steering Firmware structures in more locations over the driver,
such as the resource tracker, move them from mcg.c to common header files.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index bd6c9fcdf2dd..244ba902ab72 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -796,6 +796,8 @@ enum mlx4_net_trans_rule_id {
 	MLX4_NET_TRANS_RULE_NUM, /* should be last */
 };
 
+extern const u16 __sw_id_hw[];
+
 enum mlx4_net_trans_promisc_mode {
 	MLX4_FS_PROMISC_NONE = 0,
 	MLX4_FS_PROMISC_UPLINK,
-- 
cgit v1.2.3


From 7fb40f87c4195ec1728527f30bc744c47a45b366 Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Wed, 5 Sep 2012 22:50:49 +0000
Subject: net/mlx4_core: Add security check / enforcement for flow steering
 rules set for VMs

Since VFs may be mapped to VMs which aren't trusted entities,  flow
steering rules attached through the wrapper on behalf of VFs must be
checked to make sure that their L2 specification relate to MAC address
assigned to that VF, and add L2 specification if its missing.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 244ba902ab72..6e1b0f973a03 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -798,6 +798,17 @@ enum mlx4_net_trans_rule_id {
 
 extern const u16 __sw_id_hw[];
 
+static inline int map_hw_to_sw_id(u16 header_id)
+{
+
+	int i;
+	for (i = 0; i < MLX4_NET_TRANS_RULE_NUM; i++) {
+		if (header_id == __sw_id_hw[i])
+			return i;
+	}
+	return -EINVAL;
+}
+
 enum mlx4_net_trans_promisc_mode {
 	MLX4_FS_PROMISC_NONE = 0,
 	MLX4_FS_PROMISC_UPLINK,
-- 
cgit v1.2.3


From c076ada4e4aaf45e1a31ad6de7c6cce36081e045 Mon Sep 17 00:00:00 2001
From: Roland Stigge <stigge@antcom.de>
Date: Wed, 8 Aug 2012 09:42:32 +0200
Subject: i2c: pnx: Fix read transactions of >= 2 bytes

On transactions with n>=2 bytes, the controller actually wrongly clocks in n+1
bytes. This is caused by the (wrong) assumption that RFE in the Status Register
is 1 iff there is no byte already ordered (via a dummy TX byte). This lead to
the implementation of synchronized byte ordering, e.g.:

Dummy-TX - RX - Dummy-TX - RX - ...

But since RFE actually stays high after some Dummy-TX, it rather looks like:

Dummy-TX - Dummy-TX - RX - Dummy-TX - RX - (RX)

The last RX byte is clocked in by the bus controller, but ignored by the kernel
when filling the userspace buffer.

This patch fixes the issue by asking for RX via Dummy-TX asynchronously.
Introducing a separate counter for TX bytes.

Signed-off-by: Roland Stigge <stigge@antcom.de>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 include/linux/i2c-pnx.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h
index 1bc74afe7a35..49ed17fdf055 100644
--- a/include/linux/i2c-pnx.h
+++ b/include/linux/i2c-pnx.h
@@ -22,6 +22,7 @@ struct i2c_pnx_mif {
 	struct timer_list	timer;		/* Timeout */
 	u8 *			buf;		/* Data buffer */
 	int			len;		/* Length of data buffer */
+	int			order;		/* RX Bytes to order via TX */
 };
 
 struct i2c_pnx_algo_data {
-- 
cgit v1.2.3


From bad9ac2d7f878a31cf1ae8c1ee3768077d222bcb Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 25 Jul 2012 19:12:45 +0200
Subject: perf/x86/ibs: Check syscall attribute flags

Current implementation simply ignores attribute flags. Thus, there is
no notification to userland of unsupported features. Check syscall's
attribute flags to let userland know if a feature is supported by the
kernel. This is also needed to distinguish between future kernels what
might support a feature.

Cc: <stable@vger.kernel.org> v3.5..
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120910093018.GO8285@erda.amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 33ed9d605f91..bdb41612bfec 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -274,6 +274,8 @@ struct perf_event_attr {
 	__u64	branch_sample_type; /* enum branch_sample_type */
 };
 
+#define perf_flags(attr)	(*(&(attr)->read_format + 1))
+
 /*
  * Ioctls that can be done on a perf event fd:
  */
-- 
cgit v1.2.3


From 4b921eda53366b319602351ff4d7256fafa4bd1b Mon Sep 17 00:00:00 2001
From: Karsten Keil <keil@b1-systems.de>
Date: Thu, 13 Sep 2012 04:36:20 +0000
Subject: mISDN: Fix wrong usage of flush_work_sync while holding locks

It is a bad idea to hold a spinlock and call flush_work_sync.
Move the workqueue cleanup outside the spinlock and use cancel_work_sync,
on closing the channel this seems to be the more correct function.
Remove the never used and constant return value of mISDN_freebchannel.

Signed-off-by: Karsten Keil <keil@b1-systems.de>
Cc: <stable@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mISDNhw.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h
index d0752eca9b44..9d96d5d4dfed 100644
--- a/include/linux/mISDNhw.h
+++ b/include/linux/mISDNhw.h
@@ -183,7 +183,7 @@ extern int	mISDN_initbchannel(struct bchannel *, unsigned short,
 				   unsigned short);
 extern int	mISDN_freedchannel(struct dchannel *);
 extern void	mISDN_clear_bchannel(struct bchannel *);
-extern int	mISDN_freebchannel(struct bchannel *);
+extern void	mISDN_freebchannel(struct bchannel *);
 extern int	mISDN_ctrl_bchannel(struct bchannel *, struct mISDN_ctrl_req *);
 extern void	queue_ch_frame(struct mISDNchannel *, u_int,
 			int, struct sk_buff *);
-- 
cgit v1.2.3


From 777f4f85b75f1430c85c7d796220c82033b31268 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Thu, 5 Jul 2012 10:58:06 -0300
Subject: [media] v4l2 core: add the missing pieces to support
 DVI/HDMI/DisplayPort

These new controls and two new ioctls make it possible to properly support
VGA, DVI-A/D/I, HDMI and DisplayPort connectors. All these controls and the
ioctls are all at the sub-device level. They are meant for V4L2 bridge/platform
drivers or to be accessed on embedded systems through /dev/v4l-subdev* device
nodes.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/v4l2-subdev.h | 10 ++++++++++
 include/linux/videodev2.h   | 23 +++++++++++++++++++++++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/v4l2-subdev.h b/include/linux/v4l2-subdev.h
index 8c57ee9872bb..a33c4daadce3 100644
--- a/include/linux/v4l2-subdev.h
+++ b/include/linux/v4l2-subdev.h
@@ -148,6 +148,14 @@ struct v4l2_subdev_selection {
 	__u32 reserved[8];
 };
 
+struct v4l2_subdev_edid {
+	__u32 pad;
+	__u32 start_block;
+	__u32 blocks;
+	__u32 reserved[5];
+	__u8 __user *edid;
+};
+
 #define VIDIOC_SUBDEV_G_FMT	_IOWR('V',  4, struct v4l2_subdev_format)
 #define VIDIOC_SUBDEV_S_FMT	_IOWR('V',  5, struct v4l2_subdev_format)
 #define VIDIOC_SUBDEV_G_FRAME_INTERVAL \
@@ -166,5 +174,7 @@ struct v4l2_subdev_selection {
 	_IOWR('V', 61, struct v4l2_subdev_selection)
 #define VIDIOC_SUBDEV_S_SELECTION \
 	_IOWR('V', 62, struct v4l2_subdev_selection)
+#define VIDIOC_SUBDEV_G_EDID	_IOWR('V', 40, struct v4l2_subdev_edid)
+#define VIDIOC_SUBDEV_S_EDID	_IOWR('V', 41, struct v4l2_subdev_edid)
 
 #endif
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 7a147c8299ab..91939a7a896e 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1250,6 +1250,7 @@ struct v4l2_ext_controls {
 #define V4L2_CTRL_CLASS_JPEG 0x009d0000		/* JPEG-compression controls */
 #define V4L2_CTRL_CLASS_IMAGE_SOURCE 0x009e0000	/* Image source controls */
 #define V4L2_CTRL_CLASS_IMAGE_PROC 0x009f0000	/* Image processing controls */
+#define V4L2_CTRL_CLASS_DV 0x00a00000		/* Digital Video controls */
 
 #define V4L2_CTRL_ID_MASK      	  (0x0fffffff)
 #define V4L2_CTRL_ID2CLASS(id)    ((id) & 0x0fff0000UL)
@@ -1993,6 +1994,28 @@ enum v4l2_jpeg_chroma_subsampling {
 #define V4L2_CID_LINK_FREQ			(V4L2_CID_IMAGE_PROC_CLASS_BASE + 1)
 #define V4L2_CID_PIXEL_RATE			(V4L2_CID_IMAGE_PROC_CLASS_BASE + 2)
 
+/*  DV-class control IDs defined by V4L2 */
+#define V4L2_CID_DV_CLASS_BASE			(V4L2_CTRL_CLASS_DV | 0x900)
+#define V4L2_CID_DV_CLASS			(V4L2_CTRL_CLASS_DV | 1)
+
+#define	V4L2_CID_DV_TX_HOTPLUG			(V4L2_CID_DV_CLASS_BASE + 1)
+#define	V4L2_CID_DV_TX_RXSENSE			(V4L2_CID_DV_CLASS_BASE + 2)
+#define	V4L2_CID_DV_TX_EDID_PRESENT		(V4L2_CID_DV_CLASS_BASE + 3)
+#define	V4L2_CID_DV_TX_MODE			(V4L2_CID_DV_CLASS_BASE + 4)
+enum v4l2_dv_tx_mode {
+	V4L2_DV_TX_MODE_DVI_D	= 0,
+	V4L2_DV_TX_MODE_HDMI	= 1,
+};
+#define V4L2_CID_DV_TX_RGB_RANGE		(V4L2_CID_DV_CLASS_BASE + 5)
+enum v4l2_dv_rgb_range {
+	V4L2_DV_RGB_RANGE_AUTO	  = 0,
+	V4L2_DV_RGB_RANGE_LIMITED = 1,
+	V4L2_DV_RGB_RANGE_FULL	  = 2,
+};
+
+#define	V4L2_CID_DV_RX_POWER_PRESENT		(V4L2_CID_DV_CLASS_BASE + 100)
+#define V4L2_CID_DV_RX_RGB_RANGE		(V4L2_CID_DV_CLASS_BASE + 101)
+
 /*
  *	T U N I N G
  */
-- 
cgit v1.2.3


From 8ab023812b2b5e81bb1876a925541bceaf62538a Mon Sep 17 00:00:00 2001
From: Shaik Ameer Basha <shaik.ameer@samsung.com>
Date: Tue, 31 Jul 2012 10:44:02 -0300
Subject: [media] v4l: Add new YVU420 multi planar fourcc definition

This patch adds new 'YM21' fourcc definition for multiplanar YCrCb
pixel format - V4L2_PIX_FMT_YVU420M.

Signed-off-by: Shaik Ameer Basha <shaik.ameer@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 91939a7a896e..4862165e195e 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -368,6 +368,7 @@ struct v4l2_pix_format {
 
 /* three non contiguous planes - Y, Cb, Cr */
 #define V4L2_PIX_FMT_YUV420M v4l2_fourcc('Y', 'M', '1', '2') /* 12  YUV420 planar */
+#define V4L2_PIX_FMT_YVU420M v4l2_fourcc('Y', 'M', '2', '1') /* 12  YVU420 planar */
 
 /* Bayer formats - see http://www.siliconimaging.com/RGB%20Bayer.htm */
 #define V4L2_PIX_FMT_SBGGR8  v4l2_fourcc('B', 'A', '8', '1') /*  8  BGBG.. GRGR.. */
-- 
cgit v1.2.3


From 0848c94fb4a5cc213a7fb0fb3a5721ad6e16f096 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 11 Sep 2012 15:16:36 +0800
Subject: mfd: core: Push irqdomain mapping out into devices

Currently the MFD core supports remapping MFD cell interrupts using an
irqdomain but only if the MFD is being instantiated using device tree
and only if the device tree bindings use the pattern of registering IPs
in the device tree with compatible properties.  This will be actively
harmful for drivers which support non-DT platforms and use this pattern
for their DT bindings as it will mean that the core will silently change
remapping behaviour and it is also limiting for drivers which don't do
DT with this particular pattern.  There is also a potential fragility if
there are interrupts not associated with MFD cells and all the cells are
omitted from the device tree for some reason.

Instead change the code to take an IRQ domain as an optional argument,
allowing drivers to take the decision about the parent domain for their
interrupts.  The one current user of this feature is ab8500-core, it has
the domain lookup pushed out into the driver.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/core.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index 3a8435a8058f..cebe97ee98b8 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -16,6 +16,8 @@
 
 #include <linux/platform_device.h>
 
+struct irq_domain;
+
 /*
  * This struct describes the MFD part ("cell").
  * After registration the copy of this structure will become the platform data
@@ -98,7 +100,7 @@ static inline const struct mfd_cell *mfd_get_cell(struct platform_device *pdev)
 extern int mfd_add_devices(struct device *parent, int id,
 			   struct mfd_cell *cells, int n_devs,
 			   struct resource *mem_base,
-			   int irq_base);
+			   int irq_base, struct irq_domain *irq_domain);
 
 extern void mfd_remove_devices(struct device *parent);
 
-- 
cgit v1.2.3


From 37407ea7f93864c2cfc03edf8f37872ec539ea2b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 16 Sep 2012 12:29:43 -0700
Subject: Revert "sched: Improve scalability via 'CPU buddies', which withstand
 random perturbations"

This reverts commit 970e178985cadbca660feb02f4d2ee3a09f7fdda.

Nikolay Ulyanitsky reported thatthe 3.6-rc5 kernel has a 15-20%
performance drop on PostgreSQL 9.2 on his machine (running "pgbench").

Borislav Petkov was able to reproduce this, and bisected it to this
commit 970e178985ca ("sched: Improve scalability via 'CPU buddies' ...")
apparently because the new single-idle-buddy model simply doesn't find
idle CPU's to reschedule on aggressively enough.

Mike Galbraith suspects that it is likely due to the user-mode spinlocks
in PostgreSQL not reacting well to preemption, but we don't really know
the details - I'll just revert the commit for now.

There are hopefully other approaches to improve scheduler scalability
without it causing these kinds of downsides.

Reported-by: Nikolay Ulyanitsky <lystor@gmail.com>
Bisected-by: Borislav Petkov <bp@alien8.de>
Acked-by: Mike Galbraith <efault@gmx.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b8c86648a2f9..23bddac4bad8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -954,7 +954,6 @@ struct sched_domain {
 	unsigned int smt_gain;
 	int flags;			/* See SD_* */
 	int level;
-	int idle_buddy;			/* cpu assigned to select_idle_sibling() */
 
 	/* Runtime fields. */
 	unsigned long last_balance;	/* init to jiffies. units in jiffies */
-- 
cgit v1.2.3


From 05cf96398e1b6502f9e191291b715c7463c9d5dd Mon Sep 17 00:00:00 2001
From: Jianguo Wu <wujianguo@huawei.com>
Date: Mon, 17 Sep 2012 14:08:56 -0700
Subject: mm/ia64: fix a memory block size bug

I found following definition in include/linux/memory.h, in my IA64
platform, SECTION_SIZE_BITS is equal to 32, and MIN_MEMORY_BLOCK_SIZE
will be 0.

  #define MIN_MEMORY_BLOCK_SIZE     (1 << SECTION_SIZE_BITS)

Because MIN_MEMORY_BLOCK_SIZE is int type and length of 32bits,
so MIN_MEMORY_BLOCK_SIZE(1 << 32) will will equal to 0.
Actually when SECTION_SIZE_BITS >= 31, MIN_MEMORY_BLOCK_SIZE will be wrong.
This will cause wrong system memory infomation in sysfs.
I think it should be:

  #define MIN_MEMORY_BLOCK_SIZE     (1UL << SECTION_SIZE_BITS)

And "echo offline > memory0/state" will cause following call trace:

  kernel BUG at mm/memory_hotplug.c:885!
  sh[6455]: bugcheck! 0 [1]
  Pid: 6455, CPU 0, comm:                   sh
  psr : 0000101008526030 ifs : 8000000000000fa4 ip  : [<a0000001008c40f0>]    Not tainted (3.6.0-rc1)
  ip is at offline_pages+0x210/0xee0
  Call Trace:
    show_stack+0x80/0xa0
    show_regs+0x640/0x920
    die+0x190/0x2c0
    die_if_kernel+0x50/0x80
    ia64_bad_break+0x3d0/0x6e0
    ia64_native_leave_kernel+0x0/0x270
    offline_pages+0x210/0xee0
    alloc_pages_current+0x180/0x2a0

Signed-off-by: Jianguo Wu <wujianguo@huawei.com>
Signed-off-by: Jiang Liu <jiang.liu@huawei.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memory.h b/include/linux/memory.h
index 1ac7f6e405f9..ff9a9f8e0ed9 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -19,7 +19,7 @@
 #include <linux/compiler.h>
 #include <linux/mutex.h>
 
-#define MIN_MEMORY_BLOCK_SIZE     (1 << SECTION_SIZE_BITS)
+#define MIN_MEMORY_BLOCK_SIZE     (1UL << SECTION_SIZE_BITS)
 
 struct memory_block {
 	unsigned long start_section_nr;
-- 
cgit v1.2.3


From 9a858dc7cebce01a7bb616bebb85087fa2b40871 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Mon, 17 Sep 2012 14:09:15 -0700
Subject: compiler.h: add __visible

gcc 4.6+ has support for a externally_visible attribute that prevents the
optimizer from optimizing unused symbols away.  Add a __visible macro to
use it with that compiler version or later.

This is used (at least) by the "Link Time Optimization" patchset.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc4.h | 7 +++++++
 include/linux/compiler.h      | 4 ++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 2f4079175afb..934bc34d5f99 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -49,6 +49,13 @@
 #endif
 #endif
 
+#if __GNUC_MINOR__ >= 6
+/*
+ * Tell the optimizer that something else uses this function or variable.
+ */
+#define __visible __attribute__((externally_visible))
+#endif
+
 #if __GNUC_MINOR__ > 0
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 #endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 923d093c9cea..f430e4162f41 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -278,6 +278,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define __section(S) __attribute__ ((__section__(#S)))
 #endif
 
+#ifndef __visible
+#define __visible
+#endif
+
 /* Are two types/vars the same type (ignoring qualifiers)? */
 #ifndef __same_type
 # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
-- 
cgit v1.2.3


From b161dfa6937ae46d50adce8a7c6b12233e96e7bd Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 17 Sep 2012 22:31:38 +0200
Subject: vfs: dcache: use DCACHE_DENTRY_KILLED instead of DCACHE_DISCONNECTED
 in d_kill()

IBM reported a soft lockup after applying the fix for the rename_lock
deadlock.  Commit c83ce989cb5f ("VFS: Fix the nfs sillyrename regression
in kernel 2.6.38") was found to be the culprit.

The nfs sillyrename fix used DCACHE_DISCONNECTED to indicate that the
dentry was killed.  This flag can be set on non-killed dentries too,
which results in infinite retries when trying to traverse the dentry
tree.

This patch introduces a separate flag: DCACHE_DENTRY_KILLED, which is
only set in d_kill() and makes try_to_ascend() test only this flag.

IBM reported successful test results with this patch.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dcache.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index caa34e50537e..59200795482e 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -206,6 +206,8 @@ struct dentry_operations {
 #define DCACHE_MANAGED_DENTRY \
 	(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
 
+#define DCACHE_DENTRY_KILLED	0x100000
+
 extern seqlock_t rename_lock;
 
 static inline int dname_external(struct dentry *dentry)
-- 
cgit v1.2.3


From 263a523d18bca306016d75f5c8d5c57c37fe52fb Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 18 Sep 2012 20:42:31 -0700
Subject: linux/kernel.h: Fix warning seen with W=1 due to change in
 DIV_ROUND_CLOSEST

After commit b6d86d3d (Fix DIV_ROUND_CLOSEST to support negative dividends),
the following warning is seen if the kernel is compiled with W=1 (-Wextra):

warning: comparison of unsigned expression >= 0 is always true

The warning is due to the test '((typeof(x))-1) >= 0', which is used to detect
if the variable type is unsigned. Research on the web suggests that the warning
disappears if '>' instead of '>=' is used for the comparison.

Tests after changing the macro along that line show that the warning is gone,
and that the result is still correct:

i=-4: DIV_ROUND_CLOSEST(i, 2)=-2
i=-3: DIV_ROUND_CLOSEST(i, 2)=-2
i=-2: DIV_ROUND_CLOSEST(i, 2)=-1
i=-1: DIV_ROUND_CLOSEST(i, 2)=-1
i=0: DIV_ROUND_CLOSEST(i, 2)=0
i=1: DIV_ROUND_CLOSEST(i, 2)=1
i=2: DIV_ROUND_CLOSEST(i, 2)=1
i=3: DIV_ROUND_CLOSEST(i, 2)=2
i=4: DIV_ROUND_CLOSEST(i, 2)=2

Code size is the same as before.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/kernel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 594b419b7d20..2451f1f7a1d9 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -91,7 +91,7 @@
 {							\
 	typeof(x) __x = x;				\
 	typeof(divisor) __d = divisor;			\
-	(((typeof(x))-1) >= 0 || (__x) >= 0) ?		\
+	(((typeof(x))-1) > 0 || (__x) > 0) ?		\
 		(((__x) + ((__d) / 2)) / (__d)) :	\
 		(((__x) - ((__d) / 2)) / (__d));	\
 }							\
-- 
cgit v1.2.3


From ecd7918745234e423dd87fcc0c077da557909720 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 20 Sep 2012 10:01:49 +0000
Subject: xfrm_user: ensure user supplied esn replay window is valid

The current code fails to ensure that the netlink message actually
contains as many bytes as the header indicates. If a user creates a new
state or updates an existing one but does not supply the bytes for the
whole ESN replay window, the kernel copies random heap bytes into the
replay bitmap, the ones happen to follow the XFRMA_REPLAY_ESN_VAL
netlink attribute. This leads to following issues:

1. The replay window has random bits set confusing the replay handling
   code later on.

2. A malicious user could use this flaw to leak up to ~3.5kB of heap
   memory when she has access to the XFRM netlink interface (requires
   CAP_NET_ADMIN).

Known users of the ESN replay window are strongSwan and Steffen's
iproute2 patch (<http://patchwork.ozlabs.org/patch/85962/>). The latter
uses the interface with a bitmap supplied while the former does not.
strongSwan is therefore prone to run into issue 1.

To fix both issues without breaking existing userland allow using the
XFRMA_REPLAY_ESN_VAL netlink attribute with either an empty bitmap or a
fully specified one. For the former case we initialize the in-kernel
bitmap with zero, for the latter we copy the user supplied bitmap. For
state updates the full bitmap must be supplied.

To prevent overflows in the bitmap length calculation the maximum size
of bmp_len is limited to 128 by this patch -- resulting in a maximum
replay window of 4096 packets. This should be sufficient for all real
life scenarios (RFC 4303 recommends a default replay window size of 64).

Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Martin Willi <martin@revosec.ch>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 22e61fdf75a2..28e493b5b94c 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -84,6 +84,8 @@ struct xfrm_replay_state {
 	__u32	bitmap;
 };
 
+#define XFRMA_REPLAY_ESN_MAX	4096
+
 struct xfrm_replay_state_esn {
 	unsigned int	bmp_len;
 	__u32		oseq;
-- 
cgit v1.2.3


From 287cefd096b124874dc4d6d155f53547c0654860 Mon Sep 17 00:00:00 2001
From: Evgeny Plehov <EvgenyPlehov@ukr.net>
Date: Thu, 13 Sep 2012 10:13:30 -0300
Subject: [media] dvb_frontend: add multistream support

Unify multistream support at the DVBAPI: several delivery systems
allow it. Yet, each one had its own name. So, instead of adding
a third version of this field, remove the per-standard naming,
unifying it into a common name.

The legacy code number can still be used by old applications.

Version increased to 5.8.

[mchehab@redhat.com: joined the va1j5jf007s patch, in order to
 avoid compilation breakage]
Signed-off-by: Evgeny Plehov <EvgenyPlehov@ukr.net>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/dvb/frontend.h | 8 +++++---
 include/linux/dvb/version.h  | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index bb51edfc72a2..57e2b1763109 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -62,6 +62,7 @@ typedef enum fe_caps {
 	FE_CAN_8VSB			= 0x200000,
 	FE_CAN_16VSB			= 0x400000,
 	FE_HAS_EXTENDED_CAPS		= 0x800000,   /* We need more bitspace for newer APIs, indicate this. */
+	FE_CAN_MULTISTREAM		= 0x4000000,  /* frontend supports multistream filtering */
 	FE_CAN_TURBO_FEC		= 0x8000000,  /* frontend supports "turbo fec modulation" */
 	FE_CAN_2G_MODULATION		= 0x10000000, /* frontend supports "2nd generation modulation" (DVB-S2) */
 	FE_NEEDS_BENDING		= 0x20000000, /* not supported anymore, don't use (frontend requires frequency bending) */
@@ -338,9 +339,9 @@ struct dvb_frontend_event {
 
 #define DTV_ISDBT_LAYER_ENABLED	41
 
-#define DTV_ISDBS_TS_ID		42
-
-#define DTV_DVBT2_PLP_ID	43
+#define DTV_STREAM_ID		42
+#define DTV_ISDBS_TS_ID_LEGACY	DTV_STREAM_ID
+#define DTV_DVBT2_PLP_ID_LEGACY	43
 
 #define DTV_ENUM_DELSYS		44
 
@@ -436,6 +437,7 @@ enum atscmh_rs_code_mode {
 	ATSCMH_RSCODE_RES        = 3,
 };
 
+#define NO_STREAM_ID_FILTER	(~0U)
 
 struct dtv_cmds_h {
 	char	*name;		/* A display name for debugging purposes */
diff --git a/include/linux/dvb/version.h b/include/linux/dvb/version.h
index 70c2c7edcc7d..20e5eac2ffd3 100644
--- a/include/linux/dvb/version.h
+++ b/include/linux/dvb/version.h
@@ -24,6 +24,6 @@
 #define _DVBVERSION_H_
 
 #define DVB_API_VERSION 5
-#define DVB_API_VERSION_MINOR 7
+#define DVB_API_VERSION_MINOR 8
 
 #endif /*_DVBVERSION_H_*/
-- 
cgit v1.2.3


From 212ea99a85d30dbc834888384c57ad5abbc67a0a Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sun, 23 Sep 2012 16:58:49 +0000
Subject: phy/micrel: Implement support for KSZ8021

The KSZ8021 PHY was previously caught by KS8051, which is not correct.
This PHY needs additional setup if it is strapped for address 0. In such
case an reserved bit must be written in the 0x16, "Operation Mode Strap
Override" register. According to the KS8051 datasheet, that bit means
"PHY Address 0 in non-broadcast" and it indeed behaves as such on KSZ8021.
The issue where the ethernet controller (Freescale FEC) did not communicate
with network is fixed by writing this bit as 1.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David J. Choi <david.choi@micrel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/micrel_phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 61f0905bdc48..be7f366e37f0 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -5,6 +5,7 @@
 
 #define PHY_ID_KSZ9021		0x00221610
 #define PHY_ID_KS8737		0x00221720
+#define PHY_ID_KSZ8021		0x00221555
 #define PHY_ID_KS8041		0x00221510
 #define PHY_ID_KS8051		0x00221550
 /* both for ks8001 Rev. A/B, and for ks8721 Rev 3. */
-- 
cgit v1.2.3


From 510d573fefed904a136b220b259bf48d99d5e381 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sun, 23 Sep 2012 16:58:50 +0000
Subject: phy/micrel: Rename KS80xx to KSZ80xx

There is no such part as KS8001, KS8041 or KS8051. There are only
KSZ8001, KSZ8041 and KSZ8051. Rename these parts as such to match
the Micrel naming.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David J. Choi <david.choi@micrel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
Cc: Linux ARM kernel <linux-arm-kernel@lists.infradead.org>
Cc: Fabio Estevam <fabio.estevam@freescale.com>
Cc: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/micrel_phy.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index be7f366e37f0..d02355d1fc31 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -6,10 +6,10 @@
 #define PHY_ID_KSZ9021		0x00221610
 #define PHY_ID_KS8737		0x00221720
 #define PHY_ID_KSZ8021		0x00221555
-#define PHY_ID_KS8041		0x00221510
-#define PHY_ID_KS8051		0x00221550
+#define PHY_ID_KSZ8041		0x00221510
+#define PHY_ID_KSZ8051		0x00221550
 /* both for ks8001 Rev. A/B, and for ks8721 Rev 3. */
-#define PHY_ID_KS8001		0x0022161A
+#define PHY_ID_KSZ8001		0x0022161A
 
 /* struct phy_device dev_flags definitions */
 #define MICREL_PHY_50MHZ_CLK	0x00000001
-- 
cgit v1.2.3


From 69190e67d4fac49e2f6ea22f46780d0c712214b8 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sun, 23 Sep 2012 16:58:51 +0000
Subject: phy/micrel: Add missing header to micrel_phy.h

The license header was missing in micrel_phy.h . This patch adds
one.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David J. Choi <david.choi@micrel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/micrel_phy.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index d02355d1fc31..de201203bc7c 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -1,3 +1,15 @@
+/*
+ * include/linux/micrel_phy.h
+ *
+ * Micrel PHY IDs
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
 #ifndef _MICREL_PHY_H
 #define _MICREL_PHY_H
 
-- 
cgit v1.2.3


From bef83de5a0e3031ff6ff9584b458611a7ac01b85 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Mon, 24 Sep 2012 21:23:25 -0600
Subject: iommu: static inline iommu group stub functions

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/iommu.h | 42 ++++++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7e83370e6fd2..f3b99e1c1042 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -256,72 +256,78 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain,
 {
 }
 
-int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
+static inline int iommu_attach_group(struct iommu_domain *domain,
+				     struct iommu_group *group)
 {
 	return -ENODEV;
 }
 
-void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
+static inline void iommu_detach_group(struct iommu_domain *domain,
+				      struct iommu_group *group)
 {
 }
 
-struct iommu_group *iommu_group_alloc(void)
+static inline struct iommu_group *iommu_group_alloc(void)
 {
 	return ERR_PTR(-ENODEV);
 }
 
-void *iommu_group_get_iommudata(struct iommu_group *group)
+static inline void *iommu_group_get_iommudata(struct iommu_group *group)
 {
 	return NULL;
 }
 
-void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data,
-			       void (*release)(void *iommu_data))
+static inline void iommu_group_set_iommudata(struct iommu_group *group,
+					     void *iommu_data,
+					     void (*release)(void *iommu_data))
 {
 }
 
-int iommu_group_set_name(struct iommu_group *group, const char *name)
+static inline int iommu_group_set_name(struct iommu_group *group,
+				       const char *name)
 {
 	return -ENODEV;
 }
 
-int iommu_group_add_device(struct iommu_group *group, struct device *dev)
+static inline int iommu_group_add_device(struct iommu_group *group,
+					 struct device *dev)
 {
 	return -ENODEV;
 }
 
-void iommu_group_remove_device(struct device *dev)
+static inline void iommu_group_remove_device(struct device *dev)
 {
 }
 
-int iommu_group_for_each_dev(struct iommu_group *group, void *data,
-			     int (*fn)(struct device *, void *))
+static inline int iommu_group_for_each_dev(struct iommu_group *group,
+					   void *data,
+					   int (*fn)(struct device *, void *))
 {
 	return -ENODEV;
 }
 
-struct iommu_group *iommu_group_get(struct device *dev)
+static inline struct iommu_group *iommu_group_get(struct device *dev)
 {
 	return NULL;
 }
 
-void iommu_group_put(struct iommu_group *group)
+static inline void iommu_group_put(struct iommu_group *group)
 {
 }
 
-int iommu_group_register_notifier(struct iommu_group *group,
-				  struct notifier_block *nb)
+static inline int iommu_group_register_notifier(struct iommu_group *group,
+						struct notifier_block *nb)
 {
 	return -ENODEV;
 }
 
-int iommu_group_unregister_notifier(struct iommu_group *group,
-				    struct notifier_block *nb)
+static inline int iommu_group_unregister_notifier(struct iommu_group *group,
+						  struct notifier_block *nb)
 {
 	return 0;
 }
 
-int iommu_group_id(struct iommu_group *group)
+static inline int iommu_group_id(struct iommu_group *group)
 {
 	return -ENODEV;
 }
-- 
cgit v1.2.3


From be8cfc4af15cf611dfeb66a1fb5df43d5f1e280a Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Mon, 24 Sep 2012 17:17:38 -0700
Subject: c/r: prctl: fix build error for no-MMU case

Commit 1ad75b9e1628 ("c/r: prctl: add minimal address test to
PR_SET_MM") added some address checking to prctl_set_mm() used by
checkpoint-restore.  This causes a build error for no-MMU systems:

   kernel/sys.c: In function 'prctl_set_mm':
   kernel/sys.c:1868:34: error: 'mmap_min_addr' undeclared (first use in this function)

The test for mmap_min_addr doesn't make a lot of sense for no-MMU code
as noted in commit 6e1415467614 ("NOMMU: Optimise away the
{dac_,}mmap_min_addr tests").

This patch defines mmap_min_addr as 0UL in the no-MMU case so that the
compiler will optimize away tests for "addr < mmap_min_addr".

Signed-off-by: Mark Salter <msalter@redhat.com>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: <stable@vger.kernel.org>	[3.6.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/security.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 3dea6a9d568f..d143b8e01954 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -118,6 +118,7 @@ void reset_security_ops(void);
 extern unsigned long mmap_min_addr;
 extern unsigned long dac_mmap_min_addr;
 #else
+#define mmap_min_addr		0UL
 #define dac_mmap_min_addr	0UL
 #endif
 
-- 
cgit v1.2.3


From 8e19b3475072682312de71f4dddf6d4f4081e143 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Mon, 3 Sep 2012 09:05:10 -0300
Subject: [media] videodev2.h: split off controls into v4l2-controls.h

During the 2012 Media Workshop it was decided to split off the control
definitions into their own v4l2-controls.h header, included by videodev2.h.
Because controls make up such a large part of V4L2 they made it hard
to read videodev2.h. Splitting off the control definitions makes life
easier.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Acked-by: Sakari Ailus <sakari.ailus@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/Kbuild          |   1 +
 include/linux/v4l2-controls.h | 761 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/videodev2.h     | 697 +-------------------------------------
 3 files changed, 764 insertions(+), 695 deletions(-)
 create mode 100644 include/linux/v4l2-controls.h

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index fa217607c582..e53840d941d4 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -387,6 +387,7 @@ header-y += utsname.h
 header-y += uuid.h
 header-y += uvcvideo.h
 header-y += v4l2-common.h
+header-y += v4l2-controls.h
 header-y += v4l2-dv-timings.h
 header-y += v4l2-mediabus.h
 header-y += v4l2-subdev.h
diff --git a/include/linux/v4l2-controls.h b/include/linux/v4l2-controls.h
new file mode 100644
index 000000000000..421d24c7f686
--- /dev/null
+++ b/include/linux/v4l2-controls.h
@@ -0,0 +1,761 @@
+/*
+ *  Video for Linux Two controls header file
+ *
+ *  Copyright (C) 1999-2012 the contributors
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  Alternatively you can redistribute this file under the terms of the
+ *  BSD license as stated below:
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *  3. The names of its contributors may not be used to endorse or promote
+ *     products derived from this software without specific prior written
+ *     permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ *  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ *  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  The contents of this header was split off from videodev2.h. All control
+ *  definitions should be added to this header, which is included by
+ *  videodev2.h.
+ */
+
+#ifndef __LINUX_V4L2_CONTROLS_H
+#define __LINUX_V4L2_CONTROLS_H
+
+/* Control classes */
+#define V4L2_CTRL_CLASS_USER		0x00980000	/* Old-style 'user' controls */
+#define V4L2_CTRL_CLASS_MPEG		0x00990000	/* MPEG-compression controls */
+#define V4L2_CTRL_CLASS_CAMERA		0x009a0000	/* Camera class controls */
+#define V4L2_CTRL_CLASS_FM_TX		0x009b0000	/* FM Modulator control class */
+#define V4L2_CTRL_CLASS_FLASH		0x009c0000	/* Camera flash controls */
+#define V4L2_CTRL_CLASS_JPEG		0x009d0000	/* JPEG-compression controls */
+#define V4L2_CTRL_CLASS_IMAGE_SOURCE	0x009e0000	/* Image source controls */
+#define V4L2_CTRL_CLASS_IMAGE_PROC	0x009f0000	/* Image processing controls */
+#define V4L2_CTRL_CLASS_DV		0x00a00000	/* Digital Video controls */
+
+/* User-class control IDs */
+
+#define V4L2_CID_BASE			(V4L2_CTRL_CLASS_USER | 0x900)
+#define V4L2_CID_USER_BASE 		V4L2_CID_BASE
+#define V4L2_CID_USER_CLASS 		(V4L2_CTRL_CLASS_USER | 1)
+#define V4L2_CID_BRIGHTNESS		(V4L2_CID_BASE+0)
+#define V4L2_CID_CONTRAST		(V4L2_CID_BASE+1)
+#define V4L2_CID_SATURATION		(V4L2_CID_BASE+2)
+#define V4L2_CID_HUE			(V4L2_CID_BASE+3)
+#define V4L2_CID_AUDIO_VOLUME		(V4L2_CID_BASE+5)
+#define V4L2_CID_AUDIO_BALANCE		(V4L2_CID_BASE+6)
+#define V4L2_CID_AUDIO_BASS		(V4L2_CID_BASE+7)
+#define V4L2_CID_AUDIO_TREBLE		(V4L2_CID_BASE+8)
+#define V4L2_CID_AUDIO_MUTE		(V4L2_CID_BASE+9)
+#define V4L2_CID_AUDIO_LOUDNESS		(V4L2_CID_BASE+10)
+#define V4L2_CID_BLACK_LEVEL		(V4L2_CID_BASE+11) /* Deprecated */
+#define V4L2_CID_AUTO_WHITE_BALANCE	(V4L2_CID_BASE+12)
+#define V4L2_CID_DO_WHITE_BALANCE	(V4L2_CID_BASE+13)
+#define V4L2_CID_RED_BALANCE		(V4L2_CID_BASE+14)
+#define V4L2_CID_BLUE_BALANCE		(V4L2_CID_BASE+15)
+#define V4L2_CID_GAMMA			(V4L2_CID_BASE+16)
+#define V4L2_CID_WHITENESS		(V4L2_CID_GAMMA) /* Deprecated */
+#define V4L2_CID_EXPOSURE		(V4L2_CID_BASE+17)
+#define V4L2_CID_AUTOGAIN		(V4L2_CID_BASE+18)
+#define V4L2_CID_GAIN			(V4L2_CID_BASE+19)
+#define V4L2_CID_HFLIP			(V4L2_CID_BASE+20)
+#define V4L2_CID_VFLIP			(V4L2_CID_BASE+21)
+
+/* Deprecated; use V4L2_CID_PAN_RESET and V4L2_CID_TILT_RESET */
+#define V4L2_CID_HCENTER		(V4L2_CID_BASE+22)
+#define V4L2_CID_VCENTER		(V4L2_CID_BASE+23)
+
+#define V4L2_CID_POWER_LINE_FREQUENCY	(V4L2_CID_BASE+24)
+enum v4l2_power_line_frequency {
+	V4L2_CID_POWER_LINE_FREQUENCY_DISABLED	= 0,
+	V4L2_CID_POWER_LINE_FREQUENCY_50HZ	= 1,
+	V4L2_CID_POWER_LINE_FREQUENCY_60HZ	= 2,
+	V4L2_CID_POWER_LINE_FREQUENCY_AUTO	= 3,
+};
+#define V4L2_CID_HUE_AUTO			(V4L2_CID_BASE+25)
+#define V4L2_CID_WHITE_BALANCE_TEMPERATURE	(V4L2_CID_BASE+26)
+#define V4L2_CID_SHARPNESS			(V4L2_CID_BASE+27)
+#define V4L2_CID_BACKLIGHT_COMPENSATION 	(V4L2_CID_BASE+28)
+#define V4L2_CID_CHROMA_AGC                     (V4L2_CID_BASE+29)
+#define V4L2_CID_COLOR_KILLER                   (V4L2_CID_BASE+30)
+#define V4L2_CID_COLORFX			(V4L2_CID_BASE+31)
+enum v4l2_colorfx {
+	V4L2_COLORFX_NONE			= 0,
+	V4L2_COLORFX_BW				= 1,
+	V4L2_COLORFX_SEPIA			= 2,
+	V4L2_COLORFX_NEGATIVE			= 3,
+	V4L2_COLORFX_EMBOSS			= 4,
+	V4L2_COLORFX_SKETCH			= 5,
+	V4L2_COLORFX_SKY_BLUE			= 6,
+	V4L2_COLORFX_GRASS_GREEN		= 7,
+	V4L2_COLORFX_SKIN_WHITEN		= 8,
+	V4L2_COLORFX_VIVID			= 9,
+	V4L2_COLORFX_AQUA			= 10,
+	V4L2_COLORFX_ART_FREEZE			= 11,
+	V4L2_COLORFX_SILHOUETTE			= 12,
+	V4L2_COLORFX_SOLARIZATION		= 13,
+	V4L2_COLORFX_ANTIQUE			= 14,
+	V4L2_COLORFX_SET_CBCR			= 15,
+};
+#define V4L2_CID_AUTOBRIGHTNESS			(V4L2_CID_BASE+32)
+#define V4L2_CID_BAND_STOP_FILTER		(V4L2_CID_BASE+33)
+
+#define V4L2_CID_ROTATE				(V4L2_CID_BASE+34)
+#define V4L2_CID_BG_COLOR			(V4L2_CID_BASE+35)
+
+#define V4L2_CID_CHROMA_GAIN                    (V4L2_CID_BASE+36)
+
+#define V4L2_CID_ILLUMINATORS_1			(V4L2_CID_BASE+37)
+#define V4L2_CID_ILLUMINATORS_2			(V4L2_CID_BASE+38)
+
+#define V4L2_CID_MIN_BUFFERS_FOR_CAPTURE	(V4L2_CID_BASE+39)
+#define V4L2_CID_MIN_BUFFERS_FOR_OUTPUT		(V4L2_CID_BASE+40)
+
+#define V4L2_CID_ALPHA_COMPONENT		(V4L2_CID_BASE+41)
+#define V4L2_CID_COLORFX_CBCR			(V4L2_CID_BASE+42)
+
+/* last CID + 1 */
+#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+43)
+
+
+/* MPEG-class control IDs */
+
+#define V4L2_CID_MPEG_BASE 			(V4L2_CTRL_CLASS_MPEG | 0x900)
+#define V4L2_CID_MPEG_CLASS 			(V4L2_CTRL_CLASS_MPEG | 1)
+
+/*  MPEG streams, specific to multiplexed streams */
+#define V4L2_CID_MPEG_STREAM_TYPE 		(V4L2_CID_MPEG_BASE+0)
+enum v4l2_mpeg_stream_type {
+	V4L2_MPEG_STREAM_TYPE_MPEG2_PS   = 0, /* MPEG-2 program stream */
+	V4L2_MPEG_STREAM_TYPE_MPEG2_TS   = 1, /* MPEG-2 transport stream */
+	V4L2_MPEG_STREAM_TYPE_MPEG1_SS   = 2, /* MPEG-1 system stream */
+	V4L2_MPEG_STREAM_TYPE_MPEG2_DVD  = 3, /* MPEG-2 DVD-compatible stream */
+	V4L2_MPEG_STREAM_TYPE_MPEG1_VCD  = 4, /* MPEG-1 VCD-compatible stream */
+	V4L2_MPEG_STREAM_TYPE_MPEG2_SVCD = 5, /* MPEG-2 SVCD-compatible stream */
+};
+#define V4L2_CID_MPEG_STREAM_PID_PMT 		(V4L2_CID_MPEG_BASE+1)
+#define V4L2_CID_MPEG_STREAM_PID_AUDIO 		(V4L2_CID_MPEG_BASE+2)
+#define V4L2_CID_MPEG_STREAM_PID_VIDEO 		(V4L2_CID_MPEG_BASE+3)
+#define V4L2_CID_MPEG_STREAM_PID_PCR 		(V4L2_CID_MPEG_BASE+4)
+#define V4L2_CID_MPEG_STREAM_PES_ID_AUDIO 	(V4L2_CID_MPEG_BASE+5)
+#define V4L2_CID_MPEG_STREAM_PES_ID_VIDEO 	(V4L2_CID_MPEG_BASE+6)
+#define V4L2_CID_MPEG_STREAM_VBI_FMT 		(V4L2_CID_MPEG_BASE+7)
+enum v4l2_mpeg_stream_vbi_fmt {
+	V4L2_MPEG_STREAM_VBI_FMT_NONE = 0,  /* No VBI in the MPEG stream */
+	V4L2_MPEG_STREAM_VBI_FMT_IVTV = 1,  /* VBI in private packets, IVTV format */
+};
+
+/*  MPEG audio controls specific to multiplexed streams  */
+#define V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ 	(V4L2_CID_MPEG_BASE+100)
+enum v4l2_mpeg_audio_sampling_freq {
+	V4L2_MPEG_AUDIO_SAMPLING_FREQ_44100 = 0,
+	V4L2_MPEG_AUDIO_SAMPLING_FREQ_48000 = 1,
+	V4L2_MPEG_AUDIO_SAMPLING_FREQ_32000 = 2,
+};
+#define V4L2_CID_MPEG_AUDIO_ENCODING 		(V4L2_CID_MPEG_BASE+101)
+enum v4l2_mpeg_audio_encoding {
+	V4L2_MPEG_AUDIO_ENCODING_LAYER_1 = 0,
+	V4L2_MPEG_AUDIO_ENCODING_LAYER_2 = 1,
+	V4L2_MPEG_AUDIO_ENCODING_LAYER_3 = 2,
+	V4L2_MPEG_AUDIO_ENCODING_AAC     = 3,
+	V4L2_MPEG_AUDIO_ENCODING_AC3     = 4,
+};
+#define V4L2_CID_MPEG_AUDIO_L1_BITRATE 		(V4L2_CID_MPEG_BASE+102)
+enum v4l2_mpeg_audio_l1_bitrate {
+	V4L2_MPEG_AUDIO_L1_BITRATE_32K  = 0,
+	V4L2_MPEG_AUDIO_L1_BITRATE_64K  = 1,
+	V4L2_MPEG_AUDIO_L1_BITRATE_96K  = 2,
+	V4L2_MPEG_AUDIO_L1_BITRATE_128K = 3,
+	V4L2_MPEG_AUDIO_L1_BITRATE_160K = 4,
+	V4L2_MPEG_AUDIO_L1_BITRATE_192K = 5,
+	V4L2_MPEG_AUDIO_L1_BITRATE_224K = 6,
+	V4L2_MPEG_AUDIO_L1_BITRATE_256K = 7,
+	V4L2_MPEG_AUDIO_L1_BITRATE_288K = 8,
+	V4L2_MPEG_AUDIO_L1_BITRATE_320K = 9,
+	V4L2_MPEG_AUDIO_L1_BITRATE_352K = 10,
+	V4L2_MPEG_AUDIO_L1_BITRATE_384K = 11,
+	V4L2_MPEG_AUDIO_L1_BITRATE_416K = 12,
+	V4L2_MPEG_AUDIO_L1_BITRATE_448K = 13,
+};
+#define V4L2_CID_MPEG_AUDIO_L2_BITRATE 		(V4L2_CID_MPEG_BASE+103)
+enum v4l2_mpeg_audio_l2_bitrate {
+	V4L2_MPEG_AUDIO_L2_BITRATE_32K  = 0,
+	V4L2_MPEG_AUDIO_L2_BITRATE_48K  = 1,
+	V4L2_MPEG_AUDIO_L2_BITRATE_56K  = 2,
+	V4L2_MPEG_AUDIO_L2_BITRATE_64K  = 3,
+	V4L2_MPEG_AUDIO_L2_BITRATE_80K  = 4,
+	V4L2_MPEG_AUDIO_L2_BITRATE_96K  = 5,
+	V4L2_MPEG_AUDIO_L2_BITRATE_112K = 6,
+	V4L2_MPEG_AUDIO_L2_BITRATE_128K = 7,
+	V4L2_MPEG_AUDIO_L2_BITRATE_160K = 8,
+	V4L2_MPEG_AUDIO_L2_BITRATE_192K = 9,
+	V4L2_MPEG_AUDIO_L2_BITRATE_224K = 10,
+	V4L2_MPEG_AUDIO_L2_BITRATE_256K = 11,
+	V4L2_MPEG_AUDIO_L2_BITRATE_320K = 12,
+	V4L2_MPEG_AUDIO_L2_BITRATE_384K = 13,
+};
+#define V4L2_CID_MPEG_AUDIO_L3_BITRATE 		(V4L2_CID_MPEG_BASE+104)
+enum v4l2_mpeg_audio_l3_bitrate {
+	V4L2_MPEG_AUDIO_L3_BITRATE_32K  = 0,
+	V4L2_MPEG_AUDIO_L3_BITRATE_40K  = 1,
+	V4L2_MPEG_AUDIO_L3_BITRATE_48K  = 2,
+	V4L2_MPEG_AUDIO_L3_BITRATE_56K  = 3,
+	V4L2_MPEG_AUDIO_L3_BITRATE_64K  = 4,
+	V4L2_MPEG_AUDIO_L3_BITRATE_80K  = 5,
+	V4L2_MPEG_AUDIO_L3_BITRATE_96K  = 6,
+	V4L2_MPEG_AUDIO_L3_BITRATE_112K = 7,
+	V4L2_MPEG_AUDIO_L3_BITRATE_128K = 8,
+	V4L2_MPEG_AUDIO_L3_BITRATE_160K = 9,
+	V4L2_MPEG_AUDIO_L3_BITRATE_192K = 10,
+	V4L2_MPEG_AUDIO_L3_BITRATE_224K = 11,
+	V4L2_MPEG_AUDIO_L3_BITRATE_256K = 12,
+	V4L2_MPEG_AUDIO_L3_BITRATE_320K = 13,
+};
+#define V4L2_CID_MPEG_AUDIO_MODE 		(V4L2_CID_MPEG_BASE+105)
+enum v4l2_mpeg_audio_mode {
+	V4L2_MPEG_AUDIO_MODE_STEREO       = 0,
+	V4L2_MPEG_AUDIO_MODE_JOINT_STEREO = 1,
+	V4L2_MPEG_AUDIO_MODE_DUAL         = 2,
+	V4L2_MPEG_AUDIO_MODE_MONO         = 3,
+};
+#define V4L2_CID_MPEG_AUDIO_MODE_EXTENSION 	(V4L2_CID_MPEG_BASE+106)
+enum v4l2_mpeg_audio_mode_extension {
+	V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_4  = 0,
+	V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_8  = 1,
+	V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_12 = 2,
+	V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_16 = 3,
+};
+#define V4L2_CID_MPEG_AUDIO_EMPHASIS 		(V4L2_CID_MPEG_BASE+107)
+enum v4l2_mpeg_audio_emphasis {
+	V4L2_MPEG_AUDIO_EMPHASIS_NONE         = 0,
+	V4L2_MPEG_AUDIO_EMPHASIS_50_DIV_15_uS = 1,
+	V4L2_MPEG_AUDIO_EMPHASIS_CCITT_J17    = 2,
+};
+#define V4L2_CID_MPEG_AUDIO_CRC 		(V4L2_CID_MPEG_BASE+108)
+enum v4l2_mpeg_audio_crc {
+	V4L2_MPEG_AUDIO_CRC_NONE  = 0,
+	V4L2_MPEG_AUDIO_CRC_CRC16 = 1,
+};
+#define V4L2_CID_MPEG_AUDIO_MUTE 		(V4L2_CID_MPEG_BASE+109)
+#define V4L2_CID_MPEG_AUDIO_AAC_BITRATE		(V4L2_CID_MPEG_BASE+110)
+#define V4L2_CID_MPEG_AUDIO_AC3_BITRATE		(V4L2_CID_MPEG_BASE+111)
+enum v4l2_mpeg_audio_ac3_bitrate {
+	V4L2_MPEG_AUDIO_AC3_BITRATE_32K  = 0,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_40K  = 1,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_48K  = 2,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_56K  = 3,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_64K  = 4,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_80K  = 5,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_96K  = 6,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_112K = 7,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_128K = 8,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_160K = 9,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_192K = 10,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_224K = 11,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_256K = 12,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_320K = 13,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_384K = 14,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_448K = 15,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_512K = 16,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_576K = 17,
+	V4L2_MPEG_AUDIO_AC3_BITRATE_640K = 18,
+};
+#define V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK	(V4L2_CID_MPEG_BASE+112)
+enum v4l2_mpeg_audio_dec_playback {
+	V4L2_MPEG_AUDIO_DEC_PLAYBACK_AUTO	    = 0,
+	V4L2_MPEG_AUDIO_DEC_PLAYBACK_STEREO	    = 1,
+	V4L2_MPEG_AUDIO_DEC_PLAYBACK_LEFT	    = 2,
+	V4L2_MPEG_AUDIO_DEC_PLAYBACK_RIGHT	    = 3,
+	V4L2_MPEG_AUDIO_DEC_PLAYBACK_MONO	    = 4,
+	V4L2_MPEG_AUDIO_DEC_PLAYBACK_SWAPPED_STEREO = 5,
+};
+#define V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK (V4L2_CID_MPEG_BASE+113)
+
+/*  MPEG video controls specific to multiplexed streams */
+#define V4L2_CID_MPEG_VIDEO_ENCODING 		(V4L2_CID_MPEG_BASE+200)
+enum v4l2_mpeg_video_encoding {
+	V4L2_MPEG_VIDEO_ENCODING_MPEG_1     = 0,
+	V4L2_MPEG_VIDEO_ENCODING_MPEG_2     = 1,
+	V4L2_MPEG_VIDEO_ENCODING_MPEG_4_AVC = 2,
+};
+#define V4L2_CID_MPEG_VIDEO_ASPECT 		(V4L2_CID_MPEG_BASE+201)
+enum v4l2_mpeg_video_aspect {
+	V4L2_MPEG_VIDEO_ASPECT_1x1     = 0,
+	V4L2_MPEG_VIDEO_ASPECT_4x3     = 1,
+	V4L2_MPEG_VIDEO_ASPECT_16x9    = 2,
+	V4L2_MPEG_VIDEO_ASPECT_221x100 = 3,
+};
+#define V4L2_CID_MPEG_VIDEO_B_FRAMES 		(V4L2_CID_MPEG_BASE+202)
+#define V4L2_CID_MPEG_VIDEO_GOP_SIZE 		(V4L2_CID_MPEG_BASE+203)
+#define V4L2_CID_MPEG_VIDEO_GOP_CLOSURE 	(V4L2_CID_MPEG_BASE+204)
+#define V4L2_CID_MPEG_VIDEO_PULLDOWN 		(V4L2_CID_MPEG_BASE+205)
+#define V4L2_CID_MPEG_VIDEO_BITRATE_MODE 	(V4L2_CID_MPEG_BASE+206)
+enum v4l2_mpeg_video_bitrate_mode {
+	V4L2_MPEG_VIDEO_BITRATE_MODE_VBR = 0,
+	V4L2_MPEG_VIDEO_BITRATE_MODE_CBR = 1,
+};
+#define V4L2_CID_MPEG_VIDEO_BITRATE 		(V4L2_CID_MPEG_BASE+207)
+#define V4L2_CID_MPEG_VIDEO_BITRATE_PEAK 	(V4L2_CID_MPEG_BASE+208)
+#define V4L2_CID_MPEG_VIDEO_TEMPORAL_DECIMATION (V4L2_CID_MPEG_BASE+209)
+#define V4L2_CID_MPEG_VIDEO_MUTE 		(V4L2_CID_MPEG_BASE+210)
+#define V4L2_CID_MPEG_VIDEO_MUTE_YUV 		(V4L2_CID_MPEG_BASE+211)
+#define V4L2_CID_MPEG_VIDEO_DECODER_SLICE_INTERFACE		(V4L2_CID_MPEG_BASE+212)
+#define V4L2_CID_MPEG_VIDEO_DECODER_MPEG4_DEBLOCK_FILTER	(V4L2_CID_MPEG_BASE+213)
+#define V4L2_CID_MPEG_VIDEO_CYCLIC_INTRA_REFRESH_MB		(V4L2_CID_MPEG_BASE+214)
+#define V4L2_CID_MPEG_VIDEO_FRAME_RC_ENABLE			(V4L2_CID_MPEG_BASE+215)
+#define V4L2_CID_MPEG_VIDEO_HEADER_MODE				(V4L2_CID_MPEG_BASE+216)
+enum v4l2_mpeg_video_header_mode {
+	V4L2_MPEG_VIDEO_HEADER_MODE_SEPARATE			= 0,
+	V4L2_MPEG_VIDEO_HEADER_MODE_JOINED_WITH_1ST_FRAME	= 1,
+
+};
+#define V4L2_CID_MPEG_VIDEO_MAX_REF_PIC			(V4L2_CID_MPEG_BASE+217)
+#define V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE		(V4L2_CID_MPEG_BASE+218)
+#define V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_BYTES	(V4L2_CID_MPEG_BASE+219)
+#define V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_MB		(V4L2_CID_MPEG_BASE+220)
+#define V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE		(V4L2_CID_MPEG_BASE+221)
+enum v4l2_mpeg_video_multi_slice_mode {
+	V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE		= 0,
+	V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_MB		= 1,
+	V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_BYTES	= 2,
+};
+#define V4L2_CID_MPEG_VIDEO_VBV_SIZE			(V4L2_CID_MPEG_BASE+222)
+#define V4L2_CID_MPEG_VIDEO_DEC_PTS			(V4L2_CID_MPEG_BASE+223)
+#define V4L2_CID_MPEG_VIDEO_DEC_FRAME			(V4L2_CID_MPEG_BASE+224)
+
+#define V4L2_CID_MPEG_VIDEO_H263_I_FRAME_QP		(V4L2_CID_MPEG_BASE+300)
+#define V4L2_CID_MPEG_VIDEO_H263_P_FRAME_QP		(V4L2_CID_MPEG_BASE+301)
+#define V4L2_CID_MPEG_VIDEO_H263_B_FRAME_QP		(V4L2_CID_MPEG_BASE+302)
+#define V4L2_CID_MPEG_VIDEO_H263_MIN_QP			(V4L2_CID_MPEG_BASE+303)
+#define V4L2_CID_MPEG_VIDEO_H263_MAX_QP			(V4L2_CID_MPEG_BASE+304)
+#define V4L2_CID_MPEG_VIDEO_H264_I_FRAME_QP		(V4L2_CID_MPEG_BASE+350)
+#define V4L2_CID_MPEG_VIDEO_H264_P_FRAME_QP		(V4L2_CID_MPEG_BASE+351)
+#define V4L2_CID_MPEG_VIDEO_H264_B_FRAME_QP		(V4L2_CID_MPEG_BASE+352)
+#define V4L2_CID_MPEG_VIDEO_H264_MIN_QP			(V4L2_CID_MPEG_BASE+353)
+#define V4L2_CID_MPEG_VIDEO_H264_MAX_QP			(V4L2_CID_MPEG_BASE+354)
+#define V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM		(V4L2_CID_MPEG_BASE+355)
+#define V4L2_CID_MPEG_VIDEO_H264_CPB_SIZE		(V4L2_CID_MPEG_BASE+356)
+#define V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE		(V4L2_CID_MPEG_BASE+357)
+enum v4l2_mpeg_video_h264_entropy_mode {
+	V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC	= 0,
+	V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC	= 1,
+};
+#define V4L2_CID_MPEG_VIDEO_H264_I_PERIOD		(V4L2_CID_MPEG_BASE+358)
+#define V4L2_CID_MPEG_VIDEO_H264_LEVEL			(V4L2_CID_MPEG_BASE+359)
+enum v4l2_mpeg_video_h264_level {
+	V4L2_MPEG_VIDEO_H264_LEVEL_1_0	= 0,
+	V4L2_MPEG_VIDEO_H264_LEVEL_1B	= 1,
+	V4L2_MPEG_VIDEO_H264_LEVEL_1_1	= 2,
+	V4L2_MPEG_VIDEO_H264_LEVEL_1_2	= 3,
+	V4L2_MPEG_VIDEO_H264_LEVEL_1_3	= 4,
+	V4L2_MPEG_VIDEO_H264_LEVEL_2_0	= 5,
+	V4L2_MPEG_VIDEO_H264_LEVEL_2_1	= 6,
+	V4L2_MPEG_VIDEO_H264_LEVEL_2_2	= 7,
+	V4L2_MPEG_VIDEO_H264_LEVEL_3_0	= 8,
+	V4L2_MPEG_VIDEO_H264_LEVEL_3_1	= 9,
+	V4L2_MPEG_VIDEO_H264_LEVEL_3_2	= 10,
+	V4L2_MPEG_VIDEO_H264_LEVEL_4_0	= 11,
+	V4L2_MPEG_VIDEO_H264_LEVEL_4_1	= 12,
+	V4L2_MPEG_VIDEO_H264_LEVEL_4_2	= 13,
+	V4L2_MPEG_VIDEO_H264_LEVEL_5_0	= 14,
+	V4L2_MPEG_VIDEO_H264_LEVEL_5_1	= 15,
+};
+#define V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA	(V4L2_CID_MPEG_BASE+360)
+#define V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA	(V4L2_CID_MPEG_BASE+361)
+#define V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE	(V4L2_CID_MPEG_BASE+362)
+enum v4l2_mpeg_video_h264_loop_filter_mode {
+	V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_ENABLED				= 0,
+	V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_DISABLED				= 1,
+	V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_DISABLED_AT_SLICE_BOUNDARY	= 2,
+};
+#define V4L2_CID_MPEG_VIDEO_H264_PROFILE		(V4L2_CID_MPEG_BASE+363)
+enum v4l2_mpeg_video_h264_profile {
+	V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE			= 0,
+	V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE	= 1,
+	V4L2_MPEG_VIDEO_H264_PROFILE_MAIN			= 2,
+	V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED			= 3,
+	V4L2_MPEG_VIDEO_H264_PROFILE_HIGH			= 4,
+	V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10			= 5,
+	V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422			= 6,
+	V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_PREDICTIVE	= 7,
+	V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10_INTRA		= 8,
+	V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422_INTRA		= 9,
+	V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_INTRA		= 10,
+	V4L2_MPEG_VIDEO_H264_PROFILE_CAVLC_444_INTRA		= 11,
+	V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_BASELINE		= 12,
+	V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH		= 13,
+	V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH_INTRA	= 14,
+	V4L2_MPEG_VIDEO_H264_PROFILE_STEREO_HIGH		= 15,
+	V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH		= 16,
+};
+#define V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_HEIGHT	(V4L2_CID_MPEG_BASE+364)
+#define V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_WIDTH	(V4L2_CID_MPEG_BASE+365)
+#define V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_ENABLE		(V4L2_CID_MPEG_BASE+366)
+#define V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC		(V4L2_CID_MPEG_BASE+367)
+enum v4l2_mpeg_video_h264_vui_sar_idc {
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_UNSPECIFIED	= 0,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_1x1		= 1,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_12x11		= 2,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_10x11		= 3,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_16x11		= 4,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_40x33		= 5,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_24x11		= 6,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_20x11		= 7,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_32x11		= 8,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_80x33		= 9,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_18x11		= 10,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_15x11		= 11,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_64x33		= 12,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_160x99		= 13,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_4x3		= 14,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_3x2		= 15,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_2x1		= 16,
+	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_EXTENDED	= 17,
+};
+#define V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP	(V4L2_CID_MPEG_BASE+400)
+#define V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP	(V4L2_CID_MPEG_BASE+401)
+#define V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP	(V4L2_CID_MPEG_BASE+402)
+#define V4L2_CID_MPEG_VIDEO_MPEG4_MIN_QP	(V4L2_CID_MPEG_BASE+403)
+#define V4L2_CID_MPEG_VIDEO_MPEG4_MAX_QP	(V4L2_CID_MPEG_BASE+404)
+#define V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL		(V4L2_CID_MPEG_BASE+405)
+enum v4l2_mpeg_video_mpeg4_level {
+	V4L2_MPEG_VIDEO_MPEG4_LEVEL_0	= 0,
+	V4L2_MPEG_VIDEO_MPEG4_LEVEL_0B	= 1,
+	V4L2_MPEG_VIDEO_MPEG4_LEVEL_1	= 2,
+	V4L2_MPEG_VIDEO_MPEG4_LEVEL_2	= 3,
+	V4L2_MPEG_VIDEO_MPEG4_LEVEL_3	= 4,
+	V4L2_MPEG_VIDEO_MPEG4_LEVEL_3B	= 5,
+	V4L2_MPEG_VIDEO_MPEG4_LEVEL_4	= 6,
+	V4L2_MPEG_VIDEO_MPEG4_LEVEL_5	= 7,
+};
+#define V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE	(V4L2_CID_MPEG_BASE+406)
+enum v4l2_mpeg_video_mpeg4_profile {
+	V4L2_MPEG_VIDEO_MPEG4_PROFILE_SIMPLE				= 0,
+	V4L2_MPEG_VIDEO_MPEG4_PROFILE_ADVANCED_SIMPLE			= 1,
+	V4L2_MPEG_VIDEO_MPEG4_PROFILE_CORE				= 2,
+	V4L2_MPEG_VIDEO_MPEG4_PROFILE_SIMPLE_SCALABLE			= 3,
+	V4L2_MPEG_VIDEO_MPEG4_PROFILE_ADVANCED_CODING_EFFICIENCY	= 4,
+};
+#define V4L2_CID_MPEG_VIDEO_MPEG4_QPEL		(V4L2_CID_MPEG_BASE+407)
+
+/*  MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */
+#define V4L2_CID_MPEG_CX2341X_BASE 				(V4L2_CTRL_CLASS_MPEG | 0x1000)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE 	(V4L2_CID_MPEG_CX2341X_BASE+0)
+enum v4l2_mpeg_cx2341x_video_spatial_filter_mode {
+	V4L2_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE_MANUAL = 0,
+	V4L2_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE_AUTO   = 1,
+};
+#define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER 		(V4L2_CID_MPEG_CX2341X_BASE+1)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE 	(V4L2_CID_MPEG_CX2341X_BASE+2)
+enum v4l2_mpeg_cx2341x_video_luma_spatial_filter_type {
+	V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_OFF                  = 0,
+	V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_1D_HOR               = 1,
+	V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_1D_VERT              = 2,
+	V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_2D_HV_SEPARABLE      = 3,
+	V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_2D_SYM_NON_SEPARABLE = 4,
+};
+#define V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_SPATIAL_FILTER_TYPE 	(V4L2_CID_MPEG_CX2341X_BASE+3)
+enum v4l2_mpeg_cx2341x_video_chroma_spatial_filter_type {
+	V4L2_MPEG_CX2341X_VIDEO_CHROMA_SPATIAL_FILTER_TYPE_OFF    = 0,
+	V4L2_MPEG_CX2341X_VIDEO_CHROMA_SPATIAL_FILTER_TYPE_1D_HOR = 1,
+};
+#define V4L2_CID_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER_MODE 	(V4L2_CID_MPEG_CX2341X_BASE+4)
+enum v4l2_mpeg_cx2341x_video_temporal_filter_mode {
+	V4L2_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER_MODE_MANUAL = 0,
+	V4L2_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER_MODE_AUTO   = 1,
+};
+#define V4L2_CID_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER 		(V4L2_CID_MPEG_CX2341X_BASE+5)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE 		(V4L2_CID_MPEG_CX2341X_BASE+6)
+enum v4l2_mpeg_cx2341x_video_median_filter_type {
+	V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_OFF      = 0,
+	V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_HOR      = 1,
+	V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_VERT     = 2,
+	V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_HOR_VERT = 3,
+	V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_DIAG     = 4,
+};
+#define V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_MEDIAN_FILTER_BOTTOM 	(V4L2_CID_MPEG_CX2341X_BASE+7)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_MEDIAN_FILTER_TOP 	(V4L2_CID_MPEG_CX2341X_BASE+8)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_MEDIAN_FILTER_BOTTOM	(V4L2_CID_MPEG_CX2341X_BASE+9)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_MEDIAN_FILTER_TOP 	(V4L2_CID_MPEG_CX2341X_BASE+10)
+#define V4L2_CID_MPEG_CX2341X_STREAM_INSERT_NAV_PACKETS 	(V4L2_CID_MPEG_CX2341X_BASE+11)
+
+/*  MPEG-class control IDs specific to the Samsung MFC 5.1 driver as defined by V4L2 */
+#define V4L2_CID_MPEG_MFC51_BASE				(V4L2_CTRL_CLASS_MPEG | 0x1100)
+
+#define V4L2_CID_MPEG_MFC51_VIDEO_DECODER_H264_DISPLAY_DELAY		(V4L2_CID_MPEG_MFC51_BASE+0)
+#define V4L2_CID_MPEG_MFC51_VIDEO_DECODER_H264_DISPLAY_DELAY_ENABLE	(V4L2_CID_MPEG_MFC51_BASE+1)
+#define V4L2_CID_MPEG_MFC51_VIDEO_FRAME_SKIP_MODE			(V4L2_CID_MPEG_MFC51_BASE+2)
+enum v4l2_mpeg_mfc51_video_frame_skip_mode {
+	V4L2_MPEG_MFC51_VIDEO_FRAME_SKIP_MODE_DISABLED		= 0,
+	V4L2_MPEG_MFC51_VIDEO_FRAME_SKIP_MODE_LEVEL_LIMIT	= 1,
+	V4L2_MPEG_MFC51_VIDEO_FRAME_SKIP_MODE_BUF_LIMIT		= 2,
+};
+#define V4L2_CID_MPEG_MFC51_VIDEO_FORCE_FRAME_TYPE			(V4L2_CID_MPEG_MFC51_BASE+3)
+enum v4l2_mpeg_mfc51_video_force_frame_type {
+	V4L2_MPEG_MFC51_VIDEO_FORCE_FRAME_TYPE_DISABLED		= 0,
+	V4L2_MPEG_MFC51_VIDEO_FORCE_FRAME_TYPE_I_FRAME		= 1,
+	V4L2_MPEG_MFC51_VIDEO_FORCE_FRAME_TYPE_NOT_CODED	= 2,
+};
+#define V4L2_CID_MPEG_MFC51_VIDEO_PADDING				(V4L2_CID_MPEG_MFC51_BASE+4)
+#define V4L2_CID_MPEG_MFC51_VIDEO_PADDING_YUV				(V4L2_CID_MPEG_MFC51_BASE+5)
+#define V4L2_CID_MPEG_MFC51_VIDEO_RC_FIXED_TARGET_BIT			(V4L2_CID_MPEG_MFC51_BASE+6)
+#define V4L2_CID_MPEG_MFC51_VIDEO_RC_REACTION_COEFF			(V4L2_CID_MPEG_MFC51_BASE+7)
+#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_ACTIVITY		(V4L2_CID_MPEG_MFC51_BASE+50)
+#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_DARK			(V4L2_CID_MPEG_MFC51_BASE+51)
+#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_SMOOTH		(V4L2_CID_MPEG_MFC51_BASE+52)
+#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_STATIC		(V4L2_CID_MPEG_MFC51_BASE+53)
+#define V4L2_CID_MPEG_MFC51_VIDEO_H264_NUM_REF_PIC_FOR_P		(V4L2_CID_MPEG_MFC51_BASE+54)
+
+
+/*  Camera class control IDs */
+
+#define V4L2_CID_CAMERA_CLASS_BASE 	(V4L2_CTRL_CLASS_CAMERA | 0x900)
+#define V4L2_CID_CAMERA_CLASS 		(V4L2_CTRL_CLASS_CAMERA | 1)
+
+#define V4L2_CID_EXPOSURE_AUTO			(V4L2_CID_CAMERA_CLASS_BASE+1)
+enum  v4l2_exposure_auto_type {
+	V4L2_EXPOSURE_AUTO = 0,
+	V4L2_EXPOSURE_MANUAL = 1,
+	V4L2_EXPOSURE_SHUTTER_PRIORITY = 2,
+	V4L2_EXPOSURE_APERTURE_PRIORITY = 3
+};
+#define V4L2_CID_EXPOSURE_ABSOLUTE		(V4L2_CID_CAMERA_CLASS_BASE+2)
+#define V4L2_CID_EXPOSURE_AUTO_PRIORITY		(V4L2_CID_CAMERA_CLASS_BASE+3)
+
+#define V4L2_CID_PAN_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+4)
+#define V4L2_CID_TILT_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+5)
+#define V4L2_CID_PAN_RESET			(V4L2_CID_CAMERA_CLASS_BASE+6)
+#define V4L2_CID_TILT_RESET			(V4L2_CID_CAMERA_CLASS_BASE+7)
+
+#define V4L2_CID_PAN_ABSOLUTE			(V4L2_CID_CAMERA_CLASS_BASE+8)
+#define V4L2_CID_TILT_ABSOLUTE			(V4L2_CID_CAMERA_CLASS_BASE+9)
+
+#define V4L2_CID_FOCUS_ABSOLUTE			(V4L2_CID_CAMERA_CLASS_BASE+10)
+#define V4L2_CID_FOCUS_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+11)
+#define V4L2_CID_FOCUS_AUTO			(V4L2_CID_CAMERA_CLASS_BASE+12)
+
+#define V4L2_CID_ZOOM_ABSOLUTE			(V4L2_CID_CAMERA_CLASS_BASE+13)
+#define V4L2_CID_ZOOM_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+14)
+#define V4L2_CID_ZOOM_CONTINUOUS		(V4L2_CID_CAMERA_CLASS_BASE+15)
+
+#define V4L2_CID_PRIVACY			(V4L2_CID_CAMERA_CLASS_BASE+16)
+
+#define V4L2_CID_IRIS_ABSOLUTE			(V4L2_CID_CAMERA_CLASS_BASE+17)
+#define V4L2_CID_IRIS_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+18)
+
+#define V4L2_CID_AUTO_EXPOSURE_BIAS		(V4L2_CID_CAMERA_CLASS_BASE+19)
+
+#define V4L2_CID_AUTO_N_PRESET_WHITE_BALANCE	(V4L2_CID_CAMERA_CLASS_BASE+20)
+enum v4l2_auto_n_preset_white_balance {
+	V4L2_WHITE_BALANCE_MANUAL		= 0,
+	V4L2_WHITE_BALANCE_AUTO			= 1,
+	V4L2_WHITE_BALANCE_INCANDESCENT		= 2,
+	V4L2_WHITE_BALANCE_FLUORESCENT		= 3,
+	V4L2_WHITE_BALANCE_FLUORESCENT_H	= 4,
+	V4L2_WHITE_BALANCE_HORIZON		= 5,
+	V4L2_WHITE_BALANCE_DAYLIGHT		= 6,
+	V4L2_WHITE_BALANCE_FLASH		= 7,
+	V4L2_WHITE_BALANCE_CLOUDY		= 8,
+	V4L2_WHITE_BALANCE_SHADE		= 9,
+};
+
+#define V4L2_CID_WIDE_DYNAMIC_RANGE		(V4L2_CID_CAMERA_CLASS_BASE+21)
+#define V4L2_CID_IMAGE_STABILIZATION		(V4L2_CID_CAMERA_CLASS_BASE+22)
+
+#define V4L2_CID_ISO_SENSITIVITY		(V4L2_CID_CAMERA_CLASS_BASE+23)
+#define V4L2_CID_ISO_SENSITIVITY_AUTO		(V4L2_CID_CAMERA_CLASS_BASE+24)
+enum v4l2_iso_sensitivity_auto_type {
+	V4L2_ISO_SENSITIVITY_MANUAL		= 0,
+	V4L2_ISO_SENSITIVITY_AUTO		= 1,
+};
+
+#define V4L2_CID_EXPOSURE_METERING		(V4L2_CID_CAMERA_CLASS_BASE+25)
+enum v4l2_exposure_metering {
+	V4L2_EXPOSURE_METERING_AVERAGE		= 0,
+	V4L2_EXPOSURE_METERING_CENTER_WEIGHTED	= 1,
+	V4L2_EXPOSURE_METERING_SPOT		= 2,
+};
+
+#define V4L2_CID_SCENE_MODE			(V4L2_CID_CAMERA_CLASS_BASE+26)
+enum v4l2_scene_mode {
+	V4L2_SCENE_MODE_NONE			= 0,
+	V4L2_SCENE_MODE_BACKLIGHT		= 1,
+	V4L2_SCENE_MODE_BEACH_SNOW		= 2,
+	V4L2_SCENE_MODE_CANDLE_LIGHT		= 3,
+	V4L2_SCENE_MODE_DAWN_DUSK		= 4,
+	V4L2_SCENE_MODE_FALL_COLORS		= 5,
+	V4L2_SCENE_MODE_FIREWORKS		= 6,
+	V4L2_SCENE_MODE_LANDSCAPE		= 7,
+	V4L2_SCENE_MODE_NIGHT			= 8,
+	V4L2_SCENE_MODE_PARTY_INDOOR		= 9,
+	V4L2_SCENE_MODE_PORTRAIT		= 10,
+	V4L2_SCENE_MODE_SPORTS			= 11,
+	V4L2_SCENE_MODE_SUNSET			= 12,
+	V4L2_SCENE_MODE_TEXT			= 13,
+};
+
+#define V4L2_CID_3A_LOCK			(V4L2_CID_CAMERA_CLASS_BASE+27)
+#define V4L2_LOCK_EXPOSURE			(1 << 0)
+#define V4L2_LOCK_WHITE_BALANCE			(1 << 1)
+#define V4L2_LOCK_FOCUS				(1 << 2)
+
+#define V4L2_CID_AUTO_FOCUS_START		(V4L2_CID_CAMERA_CLASS_BASE+28)
+#define V4L2_CID_AUTO_FOCUS_STOP		(V4L2_CID_CAMERA_CLASS_BASE+29)
+#define V4L2_CID_AUTO_FOCUS_STATUS		(V4L2_CID_CAMERA_CLASS_BASE+30)
+#define V4L2_AUTO_FOCUS_STATUS_IDLE		(0 << 0)
+#define V4L2_AUTO_FOCUS_STATUS_BUSY		(1 << 0)
+#define V4L2_AUTO_FOCUS_STATUS_REACHED		(1 << 1)
+#define V4L2_AUTO_FOCUS_STATUS_FAILED		(1 << 2)
+
+#define V4L2_CID_AUTO_FOCUS_RANGE		(V4L2_CID_CAMERA_CLASS_BASE+31)
+enum v4l2_auto_focus_range {
+	V4L2_AUTO_FOCUS_RANGE_AUTO		= 0,
+	V4L2_AUTO_FOCUS_RANGE_NORMAL		= 1,
+	V4L2_AUTO_FOCUS_RANGE_MACRO		= 2,
+	V4L2_AUTO_FOCUS_RANGE_INFINITY		= 3,
+};
+
+
+/* FM Modulator class control IDs */
+
+#define V4L2_CID_FM_TX_CLASS_BASE		(V4L2_CTRL_CLASS_FM_TX | 0x900)
+#define V4L2_CID_FM_TX_CLASS			(V4L2_CTRL_CLASS_FM_TX | 1)
+
+#define V4L2_CID_RDS_TX_DEVIATION		(V4L2_CID_FM_TX_CLASS_BASE + 1)
+#define V4L2_CID_RDS_TX_PI			(V4L2_CID_FM_TX_CLASS_BASE + 2)
+#define V4L2_CID_RDS_TX_PTY			(V4L2_CID_FM_TX_CLASS_BASE + 3)
+#define V4L2_CID_RDS_TX_PS_NAME			(V4L2_CID_FM_TX_CLASS_BASE + 5)
+#define V4L2_CID_RDS_TX_RADIO_TEXT		(V4L2_CID_FM_TX_CLASS_BASE + 6)
+
+#define V4L2_CID_AUDIO_LIMITER_ENABLED		(V4L2_CID_FM_TX_CLASS_BASE + 64)
+#define V4L2_CID_AUDIO_LIMITER_RELEASE_TIME	(V4L2_CID_FM_TX_CLASS_BASE + 65)
+#define V4L2_CID_AUDIO_LIMITER_DEVIATION	(V4L2_CID_FM_TX_CLASS_BASE + 66)
+
+#define V4L2_CID_AUDIO_COMPRESSION_ENABLED	(V4L2_CID_FM_TX_CLASS_BASE + 80)
+#define V4L2_CID_AUDIO_COMPRESSION_GAIN		(V4L2_CID_FM_TX_CLASS_BASE + 81)
+#define V4L2_CID_AUDIO_COMPRESSION_THRESHOLD	(V4L2_CID_FM_TX_CLASS_BASE + 82)
+#define V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME	(V4L2_CID_FM_TX_CLASS_BASE + 83)
+#define V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME	(V4L2_CID_FM_TX_CLASS_BASE + 84)
+
+#define V4L2_CID_PILOT_TONE_ENABLED		(V4L2_CID_FM_TX_CLASS_BASE + 96)
+#define V4L2_CID_PILOT_TONE_DEVIATION		(V4L2_CID_FM_TX_CLASS_BASE + 97)
+#define V4L2_CID_PILOT_TONE_FREQUENCY		(V4L2_CID_FM_TX_CLASS_BASE + 98)
+
+#define V4L2_CID_TUNE_PREEMPHASIS		(V4L2_CID_FM_TX_CLASS_BASE + 112)
+enum v4l2_preemphasis {
+	V4L2_PREEMPHASIS_DISABLED	= 0,
+	V4L2_PREEMPHASIS_50_uS		= 1,
+	V4L2_PREEMPHASIS_75_uS		= 2,
+};
+#define V4L2_CID_TUNE_POWER_LEVEL		(V4L2_CID_FM_TX_CLASS_BASE + 113)
+#define V4L2_CID_TUNE_ANTENNA_CAPACITOR		(V4L2_CID_FM_TX_CLASS_BASE + 114)
+
+
+/* Flash and privacy (indicator) light controls */
+
+#define V4L2_CID_FLASH_CLASS_BASE		(V4L2_CTRL_CLASS_FLASH | 0x900)
+#define V4L2_CID_FLASH_CLASS			(V4L2_CTRL_CLASS_FLASH | 1)
+
+#define V4L2_CID_FLASH_LED_MODE			(V4L2_CID_FLASH_CLASS_BASE + 1)
+enum v4l2_flash_led_mode {
+	V4L2_FLASH_LED_MODE_NONE,
+	V4L2_FLASH_LED_MODE_FLASH,
+	V4L2_FLASH_LED_MODE_TORCH,
+};
+
+#define V4L2_CID_FLASH_STROBE_SOURCE		(V4L2_CID_FLASH_CLASS_BASE + 2)
+enum v4l2_flash_strobe_source {
+	V4L2_FLASH_STROBE_SOURCE_SOFTWARE,
+	V4L2_FLASH_STROBE_SOURCE_EXTERNAL,
+};
+
+#define V4L2_CID_FLASH_STROBE			(V4L2_CID_FLASH_CLASS_BASE + 3)
+#define V4L2_CID_FLASH_STROBE_STOP		(V4L2_CID_FLASH_CLASS_BASE + 4)
+#define V4L2_CID_FLASH_STROBE_STATUS		(V4L2_CID_FLASH_CLASS_BASE + 5)
+
+#define V4L2_CID_FLASH_TIMEOUT			(V4L2_CID_FLASH_CLASS_BASE + 6)
+#define V4L2_CID_FLASH_INTENSITY		(V4L2_CID_FLASH_CLASS_BASE + 7)
+#define V4L2_CID_FLASH_TORCH_INTENSITY		(V4L2_CID_FLASH_CLASS_BASE + 8)
+#define V4L2_CID_FLASH_INDICATOR_INTENSITY	(V4L2_CID_FLASH_CLASS_BASE + 9)
+
+#define V4L2_CID_FLASH_FAULT			(V4L2_CID_FLASH_CLASS_BASE + 10)
+#define V4L2_FLASH_FAULT_OVER_VOLTAGE		(1 << 0)
+#define V4L2_FLASH_FAULT_TIMEOUT		(1 << 1)
+#define V4L2_FLASH_FAULT_OVER_TEMPERATURE	(1 << 2)
+#define V4L2_FLASH_FAULT_SHORT_CIRCUIT		(1 << 3)
+#define V4L2_FLASH_FAULT_OVER_CURRENT		(1 << 4)
+#define V4L2_FLASH_FAULT_INDICATOR		(1 << 5)
+
+#define V4L2_CID_FLASH_CHARGE			(V4L2_CID_FLASH_CLASS_BASE + 11)
+#define V4L2_CID_FLASH_READY			(V4L2_CID_FLASH_CLASS_BASE + 12)
+
+
+/* JPEG-class control IDs */
+
+#define V4L2_CID_JPEG_CLASS_BASE		(V4L2_CTRL_CLASS_JPEG | 0x900)
+#define V4L2_CID_JPEG_CLASS			(V4L2_CTRL_CLASS_JPEG | 1)
+
+#define	V4L2_CID_JPEG_CHROMA_SUBSAMPLING	(V4L2_CID_JPEG_CLASS_BASE + 1)
+enum v4l2_jpeg_chroma_subsampling {
+	V4L2_JPEG_CHROMA_SUBSAMPLING_444	= 0,
+	V4L2_JPEG_CHROMA_SUBSAMPLING_422	= 1,
+	V4L2_JPEG_CHROMA_SUBSAMPLING_420	= 2,
+	V4L2_JPEG_CHROMA_SUBSAMPLING_411	= 3,
+	V4L2_JPEG_CHROMA_SUBSAMPLING_410	= 4,
+	V4L2_JPEG_CHROMA_SUBSAMPLING_GRAY	= 5,
+};
+#define	V4L2_CID_JPEG_RESTART_INTERVAL		(V4L2_CID_JPEG_CLASS_BASE + 2)
+#define	V4L2_CID_JPEG_COMPRESSION_QUALITY	(V4L2_CID_JPEG_CLASS_BASE + 3)
+
+#define	V4L2_CID_JPEG_ACTIVE_MARKER		(V4L2_CID_JPEG_CLASS_BASE + 4)
+#define	V4L2_JPEG_ACTIVE_MARKER_APP0		(1 << 0)
+#define	V4L2_JPEG_ACTIVE_MARKER_APP1		(1 << 1)
+#define	V4L2_JPEG_ACTIVE_MARKER_COM		(1 << 16)
+#define	V4L2_JPEG_ACTIVE_MARKER_DQT		(1 << 17)
+#define	V4L2_JPEG_ACTIVE_MARKER_DHT		(1 << 18)
+
+/* Image source controls */
+#define V4L2_CID_IMAGE_SOURCE_CLASS_BASE	(V4L2_CTRL_CLASS_IMAGE_SOURCE | 0x900)
+#define V4L2_CID_IMAGE_SOURCE_CLASS		(V4L2_CTRL_CLASS_IMAGE_SOURCE | 1)
+
+#define V4L2_CID_VBLANK				(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 1)
+#define V4L2_CID_HBLANK				(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 2)
+#define V4L2_CID_ANALOGUE_GAIN			(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 3)
+
+
+/* Image processing controls */
+
+#define V4L2_CID_IMAGE_PROC_CLASS_BASE		(V4L2_CTRL_CLASS_IMAGE_PROC | 0x900)
+#define V4L2_CID_IMAGE_PROC_CLASS		(V4L2_CTRL_CLASS_IMAGE_PROC | 1)
+
+#define V4L2_CID_LINK_FREQ			(V4L2_CID_IMAGE_PROC_CLASS_BASE + 1)
+#define V4L2_CID_PIXEL_RATE			(V4L2_CID_IMAGE_PROC_CLASS_BASE + 2)
+
+#endif
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 4862165e195e..1b72a38b7c8c 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1,7 +1,7 @@
 /*
  *  Video for Linux Two header file
  *
- *  Copyright (C) 1999-2007 the contributors
+ *  Copyright (C) 1999-2012 the contributors
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -65,6 +65,7 @@
 #include <linux/ioctl.h>
 #include <linux/types.h>
 #include <linux/v4l2-common.h>
+#include <linux/v4l2-controls.h>
 
 /*
  * Common stuff for both V4L1 and V4L2
@@ -1242,17 +1243,6 @@ struct v4l2_ext_controls {
 	struct v4l2_ext_control *controls;
 };
 
-/*  Values for ctrl_class field */
-#define V4L2_CTRL_CLASS_USER 0x00980000	/* Old-style 'user' controls */
-#define V4L2_CTRL_CLASS_MPEG 0x00990000	/* MPEG-compression controls */
-#define V4L2_CTRL_CLASS_CAMERA 0x009a0000	/* Camera class controls */
-#define V4L2_CTRL_CLASS_FM_TX 0x009b0000	/* FM Modulator control class */
-#define V4L2_CTRL_CLASS_FLASH 0x009c0000	/* Camera flash controls */
-#define V4L2_CTRL_CLASS_JPEG 0x009d0000		/* JPEG-compression controls */
-#define V4L2_CTRL_CLASS_IMAGE_SOURCE 0x009e0000	/* Image source controls */
-#define V4L2_CTRL_CLASS_IMAGE_PROC 0x009f0000	/* Image processing controls */
-#define V4L2_CTRL_CLASS_DV 0x00a00000		/* Digital Video controls */
-
 #define V4L2_CTRL_ID_MASK      	  (0x0fffffff)
 #define V4L2_CTRL_ID2CLASS(id)    ((id) & 0x0fff0000UL)
 #define V4L2_CTRL_DRIVER_PRIV(id) (((id) & 0xffff) >= 0x1000)
@@ -1308,692 +1298,9 @@ struct v4l2_querymenu {
 
 /*  User-class control IDs defined by V4L2 */
 #define V4L2_CID_MAX_CTRLS		1024
-#define V4L2_CID_BASE			(V4L2_CTRL_CLASS_USER | 0x900)
-#define V4L2_CID_USER_BASE 		V4L2_CID_BASE
 /*  IDs reserved for driver specific controls */
 #define V4L2_CID_PRIVATE_BASE		0x08000000
 
-#define V4L2_CID_USER_CLASS 		(V4L2_CTRL_CLASS_USER | 1)
-#define V4L2_CID_BRIGHTNESS		(V4L2_CID_BASE+0)
-#define V4L2_CID_CONTRAST		(V4L2_CID_BASE+1)
-#define V4L2_CID_SATURATION		(V4L2_CID_BASE+2)
-#define V4L2_CID_HUE			(V4L2_CID_BASE+3)
-#define V4L2_CID_AUDIO_VOLUME		(V4L2_CID_BASE+5)
-#define V4L2_CID_AUDIO_BALANCE		(V4L2_CID_BASE+6)
-#define V4L2_CID_AUDIO_BASS		(V4L2_CID_BASE+7)
-#define V4L2_CID_AUDIO_TREBLE		(V4L2_CID_BASE+8)
-#define V4L2_CID_AUDIO_MUTE		(V4L2_CID_BASE+9)
-#define V4L2_CID_AUDIO_LOUDNESS		(V4L2_CID_BASE+10)
-#define V4L2_CID_BLACK_LEVEL		(V4L2_CID_BASE+11) /* Deprecated */
-#define V4L2_CID_AUTO_WHITE_BALANCE	(V4L2_CID_BASE+12)
-#define V4L2_CID_DO_WHITE_BALANCE	(V4L2_CID_BASE+13)
-#define V4L2_CID_RED_BALANCE		(V4L2_CID_BASE+14)
-#define V4L2_CID_BLUE_BALANCE		(V4L2_CID_BASE+15)
-#define V4L2_CID_GAMMA			(V4L2_CID_BASE+16)
-#define V4L2_CID_WHITENESS		(V4L2_CID_GAMMA) /* Deprecated */
-#define V4L2_CID_EXPOSURE		(V4L2_CID_BASE+17)
-#define V4L2_CID_AUTOGAIN		(V4L2_CID_BASE+18)
-#define V4L2_CID_GAIN			(V4L2_CID_BASE+19)
-#define V4L2_CID_HFLIP			(V4L2_CID_BASE+20)
-#define V4L2_CID_VFLIP			(V4L2_CID_BASE+21)
-
-/* Deprecated; use V4L2_CID_PAN_RESET and V4L2_CID_TILT_RESET */
-#define V4L2_CID_HCENTER		(V4L2_CID_BASE+22)
-#define V4L2_CID_VCENTER		(V4L2_CID_BASE+23)
-
-#define V4L2_CID_POWER_LINE_FREQUENCY	(V4L2_CID_BASE+24)
-enum v4l2_power_line_frequency {
-	V4L2_CID_POWER_LINE_FREQUENCY_DISABLED	= 0,
-	V4L2_CID_POWER_LINE_FREQUENCY_50HZ	= 1,
-	V4L2_CID_POWER_LINE_FREQUENCY_60HZ	= 2,
-	V4L2_CID_POWER_LINE_FREQUENCY_AUTO	= 3,
-};
-#define V4L2_CID_HUE_AUTO			(V4L2_CID_BASE+25)
-#define V4L2_CID_WHITE_BALANCE_TEMPERATURE	(V4L2_CID_BASE+26)
-#define V4L2_CID_SHARPNESS			(V4L2_CID_BASE+27)
-#define V4L2_CID_BACKLIGHT_COMPENSATION 	(V4L2_CID_BASE+28)
-#define V4L2_CID_CHROMA_AGC                     (V4L2_CID_BASE+29)
-#define V4L2_CID_COLOR_KILLER                   (V4L2_CID_BASE+30)
-#define V4L2_CID_COLORFX			(V4L2_CID_BASE+31)
-enum v4l2_colorfx {
-	V4L2_COLORFX_NONE			= 0,
-	V4L2_COLORFX_BW				= 1,
-	V4L2_COLORFX_SEPIA			= 2,
-	V4L2_COLORFX_NEGATIVE			= 3,
-	V4L2_COLORFX_EMBOSS			= 4,
-	V4L2_COLORFX_SKETCH			= 5,
-	V4L2_COLORFX_SKY_BLUE			= 6,
-	V4L2_COLORFX_GRASS_GREEN		= 7,
-	V4L2_COLORFX_SKIN_WHITEN		= 8,
-	V4L2_COLORFX_VIVID			= 9,
-	V4L2_COLORFX_AQUA			= 10,
-	V4L2_COLORFX_ART_FREEZE			= 11,
-	V4L2_COLORFX_SILHOUETTE			= 12,
-	V4L2_COLORFX_SOLARIZATION		= 13,
-	V4L2_COLORFX_ANTIQUE			= 14,
-	V4L2_COLORFX_SET_CBCR			= 15,
-};
-#define V4L2_CID_AUTOBRIGHTNESS			(V4L2_CID_BASE+32)
-#define V4L2_CID_BAND_STOP_FILTER		(V4L2_CID_BASE+33)
-
-#define V4L2_CID_ROTATE				(V4L2_CID_BASE+34)
-#define V4L2_CID_BG_COLOR			(V4L2_CID_BASE+35)
-
-#define V4L2_CID_CHROMA_GAIN                    (V4L2_CID_BASE+36)
-
-#define V4L2_CID_ILLUMINATORS_1			(V4L2_CID_BASE+37)
-#define V4L2_CID_ILLUMINATORS_2			(V4L2_CID_BASE+38)
-
-#define V4L2_CID_MIN_BUFFERS_FOR_CAPTURE	(V4L2_CID_BASE+39)
-#define V4L2_CID_MIN_BUFFERS_FOR_OUTPUT		(V4L2_CID_BASE+40)
-
-#define V4L2_CID_ALPHA_COMPONENT		(V4L2_CID_BASE+41)
-#define V4L2_CID_COLORFX_CBCR			(V4L2_CID_BASE+42)
-
-/* last CID + 1 */
-#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+43)
-
-/*  MPEG-class control IDs defined by V4L2 */
-#define V4L2_CID_MPEG_BASE 			(V4L2_CTRL_CLASS_MPEG | 0x900)
-#define V4L2_CID_MPEG_CLASS 			(V4L2_CTRL_CLASS_MPEG | 1)
-
-/*  MPEG streams, specific to multiplexed streams */
-#define V4L2_CID_MPEG_STREAM_TYPE 		(V4L2_CID_MPEG_BASE+0)
-enum v4l2_mpeg_stream_type {
-	V4L2_MPEG_STREAM_TYPE_MPEG2_PS   = 0, /* MPEG-2 program stream */
-	V4L2_MPEG_STREAM_TYPE_MPEG2_TS   = 1, /* MPEG-2 transport stream */
-	V4L2_MPEG_STREAM_TYPE_MPEG1_SS   = 2, /* MPEG-1 system stream */
-	V4L2_MPEG_STREAM_TYPE_MPEG2_DVD  = 3, /* MPEG-2 DVD-compatible stream */
-	V4L2_MPEG_STREAM_TYPE_MPEG1_VCD  = 4, /* MPEG-1 VCD-compatible stream */
-	V4L2_MPEG_STREAM_TYPE_MPEG2_SVCD = 5, /* MPEG-2 SVCD-compatible stream */
-};
-#define V4L2_CID_MPEG_STREAM_PID_PMT 		(V4L2_CID_MPEG_BASE+1)
-#define V4L2_CID_MPEG_STREAM_PID_AUDIO 		(V4L2_CID_MPEG_BASE+2)
-#define V4L2_CID_MPEG_STREAM_PID_VIDEO 		(V4L2_CID_MPEG_BASE+3)
-#define V4L2_CID_MPEG_STREAM_PID_PCR 		(V4L2_CID_MPEG_BASE+4)
-#define V4L2_CID_MPEG_STREAM_PES_ID_AUDIO 	(V4L2_CID_MPEG_BASE+5)
-#define V4L2_CID_MPEG_STREAM_PES_ID_VIDEO 	(V4L2_CID_MPEG_BASE+6)
-#define V4L2_CID_MPEG_STREAM_VBI_FMT 		(V4L2_CID_MPEG_BASE+7)
-enum v4l2_mpeg_stream_vbi_fmt {
-	V4L2_MPEG_STREAM_VBI_FMT_NONE = 0,  /* No VBI in the MPEG stream */
-	V4L2_MPEG_STREAM_VBI_FMT_IVTV = 1,  /* VBI in private packets, IVTV format */
-};
-
-/*  MPEG audio controls specific to multiplexed streams  */
-#define V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ 	(V4L2_CID_MPEG_BASE+100)
-enum v4l2_mpeg_audio_sampling_freq {
-	V4L2_MPEG_AUDIO_SAMPLING_FREQ_44100 = 0,
-	V4L2_MPEG_AUDIO_SAMPLING_FREQ_48000 = 1,
-	V4L2_MPEG_AUDIO_SAMPLING_FREQ_32000 = 2,
-};
-#define V4L2_CID_MPEG_AUDIO_ENCODING 		(V4L2_CID_MPEG_BASE+101)
-enum v4l2_mpeg_audio_encoding {
-	V4L2_MPEG_AUDIO_ENCODING_LAYER_1 = 0,
-	V4L2_MPEG_AUDIO_ENCODING_LAYER_2 = 1,
-	V4L2_MPEG_AUDIO_ENCODING_LAYER_3 = 2,
-	V4L2_MPEG_AUDIO_ENCODING_AAC     = 3,
-	V4L2_MPEG_AUDIO_ENCODING_AC3     = 4,
-};
-#define V4L2_CID_MPEG_AUDIO_L1_BITRATE 		(V4L2_CID_MPEG_BASE+102)
-enum v4l2_mpeg_audio_l1_bitrate {
-	V4L2_MPEG_AUDIO_L1_BITRATE_32K  = 0,
-	V4L2_MPEG_AUDIO_L1_BITRATE_64K  = 1,
-	V4L2_MPEG_AUDIO_L1_BITRATE_96K  = 2,
-	V4L2_MPEG_AUDIO_L1_BITRATE_128K = 3,
-	V4L2_MPEG_AUDIO_L1_BITRATE_160K = 4,
-	V4L2_MPEG_AUDIO_L1_BITRATE_192K = 5,
-	V4L2_MPEG_AUDIO_L1_BITRATE_224K = 6,
-	V4L2_MPEG_AUDIO_L1_BITRATE_256K = 7,
-	V4L2_MPEG_AUDIO_L1_BITRATE_288K = 8,
-	V4L2_MPEG_AUDIO_L1_BITRATE_320K = 9,
-	V4L2_MPEG_AUDIO_L1_BITRATE_352K = 10,
-	V4L2_MPEG_AUDIO_L1_BITRATE_384K = 11,
-	V4L2_MPEG_AUDIO_L1_BITRATE_416K = 12,
-	V4L2_MPEG_AUDIO_L1_BITRATE_448K = 13,
-};
-#define V4L2_CID_MPEG_AUDIO_L2_BITRATE 		(V4L2_CID_MPEG_BASE+103)
-enum v4l2_mpeg_audio_l2_bitrate {
-	V4L2_MPEG_AUDIO_L2_BITRATE_32K  = 0,
-	V4L2_MPEG_AUDIO_L2_BITRATE_48K  = 1,
-	V4L2_MPEG_AUDIO_L2_BITRATE_56K  = 2,
-	V4L2_MPEG_AUDIO_L2_BITRATE_64K  = 3,
-	V4L2_MPEG_AUDIO_L2_BITRATE_80K  = 4,
-	V4L2_MPEG_AUDIO_L2_BITRATE_96K  = 5,
-	V4L2_MPEG_AUDIO_L2_BITRATE_112K = 6,
-	V4L2_MPEG_AUDIO_L2_BITRATE_128K = 7,
-	V4L2_MPEG_AUDIO_L2_BITRATE_160K = 8,
-	V4L2_MPEG_AUDIO_L2_BITRATE_192K = 9,
-	V4L2_MPEG_AUDIO_L2_BITRATE_224K = 10,
-	V4L2_MPEG_AUDIO_L2_BITRATE_256K = 11,
-	V4L2_MPEG_AUDIO_L2_BITRATE_320K = 12,
-	V4L2_MPEG_AUDIO_L2_BITRATE_384K = 13,
-};
-#define V4L2_CID_MPEG_AUDIO_L3_BITRATE 		(V4L2_CID_MPEG_BASE+104)
-enum v4l2_mpeg_audio_l3_bitrate {
-	V4L2_MPEG_AUDIO_L3_BITRATE_32K  = 0,
-	V4L2_MPEG_AUDIO_L3_BITRATE_40K  = 1,
-	V4L2_MPEG_AUDIO_L3_BITRATE_48K  = 2,
-	V4L2_MPEG_AUDIO_L3_BITRATE_56K  = 3,
-	V4L2_MPEG_AUDIO_L3_BITRATE_64K  = 4,
-	V4L2_MPEG_AUDIO_L3_BITRATE_80K  = 5,
-	V4L2_MPEG_AUDIO_L3_BITRATE_96K  = 6,
-	V4L2_MPEG_AUDIO_L3_BITRATE_112K = 7,
-	V4L2_MPEG_AUDIO_L3_BITRATE_128K = 8,
-	V4L2_MPEG_AUDIO_L3_BITRATE_160K = 9,
-	V4L2_MPEG_AUDIO_L3_BITRATE_192K = 10,
-	V4L2_MPEG_AUDIO_L3_BITRATE_224K = 11,
-	V4L2_MPEG_AUDIO_L3_BITRATE_256K = 12,
-	V4L2_MPEG_AUDIO_L3_BITRATE_320K = 13,
-};
-#define V4L2_CID_MPEG_AUDIO_MODE 		(V4L2_CID_MPEG_BASE+105)
-enum v4l2_mpeg_audio_mode {
-	V4L2_MPEG_AUDIO_MODE_STEREO       = 0,
-	V4L2_MPEG_AUDIO_MODE_JOINT_STEREO = 1,
-	V4L2_MPEG_AUDIO_MODE_DUAL         = 2,
-	V4L2_MPEG_AUDIO_MODE_MONO         = 3,
-};
-#define V4L2_CID_MPEG_AUDIO_MODE_EXTENSION 	(V4L2_CID_MPEG_BASE+106)
-enum v4l2_mpeg_audio_mode_extension {
-	V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_4  = 0,
-	V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_8  = 1,
-	V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_12 = 2,
-	V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_16 = 3,
-};
-#define V4L2_CID_MPEG_AUDIO_EMPHASIS 		(V4L2_CID_MPEG_BASE+107)
-enum v4l2_mpeg_audio_emphasis {
-	V4L2_MPEG_AUDIO_EMPHASIS_NONE         = 0,
-	V4L2_MPEG_AUDIO_EMPHASIS_50_DIV_15_uS = 1,
-	V4L2_MPEG_AUDIO_EMPHASIS_CCITT_J17    = 2,
-};
-#define V4L2_CID_MPEG_AUDIO_CRC 		(V4L2_CID_MPEG_BASE+108)
-enum v4l2_mpeg_audio_crc {
-	V4L2_MPEG_AUDIO_CRC_NONE  = 0,
-	V4L2_MPEG_AUDIO_CRC_CRC16 = 1,
-};
-#define V4L2_CID_MPEG_AUDIO_MUTE 		(V4L2_CID_MPEG_BASE+109)
-#define V4L2_CID_MPEG_AUDIO_AAC_BITRATE		(V4L2_CID_MPEG_BASE+110)
-#define V4L2_CID_MPEG_AUDIO_AC3_BITRATE		(V4L2_CID_MPEG_BASE+111)
-enum v4l2_mpeg_audio_ac3_bitrate {
-	V4L2_MPEG_AUDIO_AC3_BITRATE_32K  = 0,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_40K  = 1,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_48K  = 2,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_56K  = 3,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_64K  = 4,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_80K  = 5,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_96K  = 6,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_112K = 7,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_128K = 8,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_160K = 9,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_192K = 10,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_224K = 11,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_256K = 12,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_320K = 13,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_384K = 14,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_448K = 15,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_512K = 16,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_576K = 17,
-	V4L2_MPEG_AUDIO_AC3_BITRATE_640K = 18,
-};
-#define V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK	(V4L2_CID_MPEG_BASE+112)
-enum v4l2_mpeg_audio_dec_playback {
-	V4L2_MPEG_AUDIO_DEC_PLAYBACK_AUTO	    = 0,
-	V4L2_MPEG_AUDIO_DEC_PLAYBACK_STEREO	    = 1,
-	V4L2_MPEG_AUDIO_DEC_PLAYBACK_LEFT	    = 2,
-	V4L2_MPEG_AUDIO_DEC_PLAYBACK_RIGHT	    = 3,
-	V4L2_MPEG_AUDIO_DEC_PLAYBACK_MONO	    = 4,
-	V4L2_MPEG_AUDIO_DEC_PLAYBACK_SWAPPED_STEREO = 5,
-};
-#define V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK (V4L2_CID_MPEG_BASE+113)
-
-/*  MPEG video controls specific to multiplexed streams */
-#define V4L2_CID_MPEG_VIDEO_ENCODING 		(V4L2_CID_MPEG_BASE+200)
-enum v4l2_mpeg_video_encoding {
-	V4L2_MPEG_VIDEO_ENCODING_MPEG_1     = 0,
-	V4L2_MPEG_VIDEO_ENCODING_MPEG_2     = 1,
-	V4L2_MPEG_VIDEO_ENCODING_MPEG_4_AVC = 2,
-};
-#define V4L2_CID_MPEG_VIDEO_ASPECT 		(V4L2_CID_MPEG_BASE+201)
-enum v4l2_mpeg_video_aspect {
-	V4L2_MPEG_VIDEO_ASPECT_1x1     = 0,
-	V4L2_MPEG_VIDEO_ASPECT_4x3     = 1,
-	V4L2_MPEG_VIDEO_ASPECT_16x9    = 2,
-	V4L2_MPEG_VIDEO_ASPECT_221x100 = 3,
-};
-#define V4L2_CID_MPEG_VIDEO_B_FRAMES 		(V4L2_CID_MPEG_BASE+202)
-#define V4L2_CID_MPEG_VIDEO_GOP_SIZE 		(V4L2_CID_MPEG_BASE+203)
-#define V4L2_CID_MPEG_VIDEO_GOP_CLOSURE 	(V4L2_CID_MPEG_BASE+204)
-#define V4L2_CID_MPEG_VIDEO_PULLDOWN 		(V4L2_CID_MPEG_BASE+205)
-#define V4L2_CID_MPEG_VIDEO_BITRATE_MODE 	(V4L2_CID_MPEG_BASE+206)
-enum v4l2_mpeg_video_bitrate_mode {
-	V4L2_MPEG_VIDEO_BITRATE_MODE_VBR = 0,
-	V4L2_MPEG_VIDEO_BITRATE_MODE_CBR = 1,
-};
-#define V4L2_CID_MPEG_VIDEO_BITRATE 		(V4L2_CID_MPEG_BASE+207)
-#define V4L2_CID_MPEG_VIDEO_BITRATE_PEAK 	(V4L2_CID_MPEG_BASE+208)
-#define V4L2_CID_MPEG_VIDEO_TEMPORAL_DECIMATION (V4L2_CID_MPEG_BASE+209)
-#define V4L2_CID_MPEG_VIDEO_MUTE 		(V4L2_CID_MPEG_BASE+210)
-#define V4L2_CID_MPEG_VIDEO_MUTE_YUV 		(V4L2_CID_MPEG_BASE+211)
-#define V4L2_CID_MPEG_VIDEO_DECODER_SLICE_INTERFACE		(V4L2_CID_MPEG_BASE+212)
-#define V4L2_CID_MPEG_VIDEO_DECODER_MPEG4_DEBLOCK_FILTER	(V4L2_CID_MPEG_BASE+213)
-#define V4L2_CID_MPEG_VIDEO_CYCLIC_INTRA_REFRESH_MB		(V4L2_CID_MPEG_BASE+214)
-#define V4L2_CID_MPEG_VIDEO_FRAME_RC_ENABLE			(V4L2_CID_MPEG_BASE+215)
-#define V4L2_CID_MPEG_VIDEO_HEADER_MODE				(V4L2_CID_MPEG_BASE+216)
-enum v4l2_mpeg_video_header_mode {
-	V4L2_MPEG_VIDEO_HEADER_MODE_SEPARATE			= 0,
-	V4L2_MPEG_VIDEO_HEADER_MODE_JOINED_WITH_1ST_FRAME	= 1,
-
-};
-#define V4L2_CID_MPEG_VIDEO_MAX_REF_PIC			(V4L2_CID_MPEG_BASE+217)
-#define V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE		(V4L2_CID_MPEG_BASE+218)
-#define V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_BYTES	(V4L2_CID_MPEG_BASE+219)
-#define V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_MB		(V4L2_CID_MPEG_BASE+220)
-#define V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE		(V4L2_CID_MPEG_BASE+221)
-enum v4l2_mpeg_video_multi_slice_mode {
-	V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE		= 0,
-	V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_MB		= 1,
-	V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_BYTES	= 2,
-};
-#define V4L2_CID_MPEG_VIDEO_VBV_SIZE			(V4L2_CID_MPEG_BASE+222)
-#define V4L2_CID_MPEG_VIDEO_DEC_PTS			(V4L2_CID_MPEG_BASE+223)
-#define V4L2_CID_MPEG_VIDEO_DEC_FRAME			(V4L2_CID_MPEG_BASE+224)
-
-#define V4L2_CID_MPEG_VIDEO_H263_I_FRAME_QP		(V4L2_CID_MPEG_BASE+300)
-#define V4L2_CID_MPEG_VIDEO_H263_P_FRAME_QP		(V4L2_CID_MPEG_BASE+301)
-#define V4L2_CID_MPEG_VIDEO_H263_B_FRAME_QP		(V4L2_CID_MPEG_BASE+302)
-#define V4L2_CID_MPEG_VIDEO_H263_MIN_QP			(V4L2_CID_MPEG_BASE+303)
-#define V4L2_CID_MPEG_VIDEO_H263_MAX_QP			(V4L2_CID_MPEG_BASE+304)
-#define V4L2_CID_MPEG_VIDEO_H264_I_FRAME_QP		(V4L2_CID_MPEG_BASE+350)
-#define V4L2_CID_MPEG_VIDEO_H264_P_FRAME_QP		(V4L2_CID_MPEG_BASE+351)
-#define V4L2_CID_MPEG_VIDEO_H264_B_FRAME_QP		(V4L2_CID_MPEG_BASE+352)
-#define V4L2_CID_MPEG_VIDEO_H264_MIN_QP			(V4L2_CID_MPEG_BASE+353)
-#define V4L2_CID_MPEG_VIDEO_H264_MAX_QP			(V4L2_CID_MPEG_BASE+354)
-#define V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM		(V4L2_CID_MPEG_BASE+355)
-#define V4L2_CID_MPEG_VIDEO_H264_CPB_SIZE		(V4L2_CID_MPEG_BASE+356)
-#define V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE		(V4L2_CID_MPEG_BASE+357)
-enum v4l2_mpeg_video_h264_entropy_mode {
-	V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC	= 0,
-	V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC	= 1,
-};
-#define V4L2_CID_MPEG_VIDEO_H264_I_PERIOD		(V4L2_CID_MPEG_BASE+358)
-#define V4L2_CID_MPEG_VIDEO_H264_LEVEL			(V4L2_CID_MPEG_BASE+359)
-enum v4l2_mpeg_video_h264_level {
-	V4L2_MPEG_VIDEO_H264_LEVEL_1_0	= 0,
-	V4L2_MPEG_VIDEO_H264_LEVEL_1B	= 1,
-	V4L2_MPEG_VIDEO_H264_LEVEL_1_1	= 2,
-	V4L2_MPEG_VIDEO_H264_LEVEL_1_2	= 3,
-	V4L2_MPEG_VIDEO_H264_LEVEL_1_3	= 4,
-	V4L2_MPEG_VIDEO_H264_LEVEL_2_0	= 5,
-	V4L2_MPEG_VIDEO_H264_LEVEL_2_1	= 6,
-	V4L2_MPEG_VIDEO_H264_LEVEL_2_2	= 7,
-	V4L2_MPEG_VIDEO_H264_LEVEL_3_0	= 8,
-	V4L2_MPEG_VIDEO_H264_LEVEL_3_1	= 9,
-	V4L2_MPEG_VIDEO_H264_LEVEL_3_2	= 10,
-	V4L2_MPEG_VIDEO_H264_LEVEL_4_0	= 11,
-	V4L2_MPEG_VIDEO_H264_LEVEL_4_1	= 12,
-	V4L2_MPEG_VIDEO_H264_LEVEL_4_2	= 13,
-	V4L2_MPEG_VIDEO_H264_LEVEL_5_0	= 14,
-	V4L2_MPEG_VIDEO_H264_LEVEL_5_1	= 15,
-};
-#define V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA	(V4L2_CID_MPEG_BASE+360)
-#define V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA	(V4L2_CID_MPEG_BASE+361)
-#define V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE	(V4L2_CID_MPEG_BASE+362)
-enum v4l2_mpeg_video_h264_loop_filter_mode {
-	V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_ENABLED				= 0,
-	V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_DISABLED				= 1,
-	V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_DISABLED_AT_SLICE_BOUNDARY	= 2,
-};
-#define V4L2_CID_MPEG_VIDEO_H264_PROFILE		(V4L2_CID_MPEG_BASE+363)
-enum v4l2_mpeg_video_h264_profile {
-	V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE			= 0,
-	V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE	= 1,
-	V4L2_MPEG_VIDEO_H264_PROFILE_MAIN			= 2,
-	V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED			= 3,
-	V4L2_MPEG_VIDEO_H264_PROFILE_HIGH			= 4,
-	V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10			= 5,
-	V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422			= 6,
-	V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_PREDICTIVE	= 7,
-	V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10_INTRA		= 8,
-	V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422_INTRA		= 9,
-	V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_INTRA		= 10,
-	V4L2_MPEG_VIDEO_H264_PROFILE_CAVLC_444_INTRA		= 11,
-	V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_BASELINE		= 12,
-	V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH		= 13,
-	V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH_INTRA	= 14,
-	V4L2_MPEG_VIDEO_H264_PROFILE_STEREO_HIGH		= 15,
-	V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH		= 16,
-};
-#define V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_HEIGHT	(V4L2_CID_MPEG_BASE+364)
-#define V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_WIDTH	(V4L2_CID_MPEG_BASE+365)
-#define V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_ENABLE		(V4L2_CID_MPEG_BASE+366)
-#define V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC		(V4L2_CID_MPEG_BASE+367)
-enum v4l2_mpeg_video_h264_vui_sar_idc {
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_UNSPECIFIED	= 0,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_1x1		= 1,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_12x11		= 2,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_10x11		= 3,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_16x11		= 4,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_40x33		= 5,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_24x11		= 6,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_20x11		= 7,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_32x11		= 8,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_80x33		= 9,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_18x11		= 10,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_15x11		= 11,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_64x33		= 12,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_160x99		= 13,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_4x3		= 14,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_3x2		= 15,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_2x1		= 16,
-	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_EXTENDED	= 17,
-};
-#define V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP	(V4L2_CID_MPEG_BASE+400)
-#define V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP	(V4L2_CID_MPEG_BASE+401)
-#define V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP	(V4L2_CID_MPEG_BASE+402)
-#define V4L2_CID_MPEG_VIDEO_MPEG4_MIN_QP	(V4L2_CID_MPEG_BASE+403)
-#define V4L2_CID_MPEG_VIDEO_MPEG4_MAX_QP	(V4L2_CID_MPEG_BASE+404)
-#define V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL		(V4L2_CID_MPEG_BASE+405)
-enum v4l2_mpeg_video_mpeg4_level {
-	V4L2_MPEG_VIDEO_MPEG4_LEVEL_0	= 0,
-	V4L2_MPEG_VIDEO_MPEG4_LEVEL_0B	= 1,
-	V4L2_MPEG_VIDEO_MPEG4_LEVEL_1	= 2,
-	V4L2_MPEG_VIDEO_MPEG4_LEVEL_2	= 3,
-	V4L2_MPEG_VIDEO_MPEG4_LEVEL_3	= 4,
-	V4L2_MPEG_VIDEO_MPEG4_LEVEL_3B	= 5,
-	V4L2_MPEG_VIDEO_MPEG4_LEVEL_4	= 6,
-	V4L2_MPEG_VIDEO_MPEG4_LEVEL_5	= 7,
-};
-#define V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE	(V4L2_CID_MPEG_BASE+406)
-enum v4l2_mpeg_video_mpeg4_profile {
-	V4L2_MPEG_VIDEO_MPEG4_PROFILE_SIMPLE				= 0,
-	V4L2_MPEG_VIDEO_MPEG4_PROFILE_ADVANCED_SIMPLE			= 1,
-	V4L2_MPEG_VIDEO_MPEG4_PROFILE_CORE				= 2,
-	V4L2_MPEG_VIDEO_MPEG4_PROFILE_SIMPLE_SCALABLE			= 3,
-	V4L2_MPEG_VIDEO_MPEG4_PROFILE_ADVANCED_CODING_EFFICIENCY	= 4,
-};
-#define V4L2_CID_MPEG_VIDEO_MPEG4_QPEL		(V4L2_CID_MPEG_BASE+407)
-
-/*  MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */
-#define V4L2_CID_MPEG_CX2341X_BASE 				(V4L2_CTRL_CLASS_MPEG | 0x1000)
-#define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE 	(V4L2_CID_MPEG_CX2341X_BASE+0)
-enum v4l2_mpeg_cx2341x_video_spatial_filter_mode {
-	V4L2_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE_MANUAL = 0,
-	V4L2_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE_AUTO   = 1,
-};
-#define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER 		(V4L2_CID_MPEG_CX2341X_BASE+1)
-#define V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE 	(V4L2_CID_MPEG_CX2341X_BASE+2)
-enum v4l2_mpeg_cx2341x_video_luma_spatial_filter_type {
-	V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_OFF                  = 0,
-	V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_1D_HOR               = 1,
-	V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_1D_VERT              = 2,
-	V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_2D_HV_SEPARABLE      = 3,
-	V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_2D_SYM_NON_SEPARABLE = 4,
-};
-#define V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_SPATIAL_FILTER_TYPE 	(V4L2_CID_MPEG_CX2341X_BASE+3)
-enum v4l2_mpeg_cx2341x_video_chroma_spatial_filter_type {
-	V4L2_MPEG_CX2341X_VIDEO_CHROMA_SPATIAL_FILTER_TYPE_OFF    = 0,
-	V4L2_MPEG_CX2341X_VIDEO_CHROMA_SPATIAL_FILTER_TYPE_1D_HOR = 1,
-};
-#define V4L2_CID_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER_MODE 	(V4L2_CID_MPEG_CX2341X_BASE+4)
-enum v4l2_mpeg_cx2341x_video_temporal_filter_mode {
-	V4L2_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER_MODE_MANUAL = 0,
-	V4L2_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER_MODE_AUTO   = 1,
-};
-#define V4L2_CID_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER 		(V4L2_CID_MPEG_CX2341X_BASE+5)
-#define V4L2_CID_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE 		(V4L2_CID_MPEG_CX2341X_BASE+6)
-enum v4l2_mpeg_cx2341x_video_median_filter_type {
-	V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_OFF      = 0,
-	V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_HOR      = 1,
-	V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_VERT     = 2,
-	V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_HOR_VERT = 3,
-	V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_DIAG     = 4,
-};
-#define V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_MEDIAN_FILTER_BOTTOM 	(V4L2_CID_MPEG_CX2341X_BASE+7)
-#define V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_MEDIAN_FILTER_TOP 	(V4L2_CID_MPEG_CX2341X_BASE+8)
-#define V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_MEDIAN_FILTER_BOTTOM	(V4L2_CID_MPEG_CX2341X_BASE+9)
-#define V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_MEDIAN_FILTER_TOP 	(V4L2_CID_MPEG_CX2341X_BASE+10)
-#define V4L2_CID_MPEG_CX2341X_STREAM_INSERT_NAV_PACKETS 	(V4L2_CID_MPEG_CX2341X_BASE+11)
-
-/*  MPEG-class control IDs specific to the Samsung MFC 5.1 driver as defined by V4L2 */
-#define V4L2_CID_MPEG_MFC51_BASE				(V4L2_CTRL_CLASS_MPEG | 0x1100)
-
-#define V4L2_CID_MPEG_MFC51_VIDEO_DECODER_H264_DISPLAY_DELAY		(V4L2_CID_MPEG_MFC51_BASE+0)
-#define V4L2_CID_MPEG_MFC51_VIDEO_DECODER_H264_DISPLAY_DELAY_ENABLE	(V4L2_CID_MPEG_MFC51_BASE+1)
-#define V4L2_CID_MPEG_MFC51_VIDEO_FRAME_SKIP_MODE			(V4L2_CID_MPEG_MFC51_BASE+2)
-enum v4l2_mpeg_mfc51_video_frame_skip_mode {
-	V4L2_MPEG_MFC51_VIDEO_FRAME_SKIP_MODE_DISABLED		= 0,
-	V4L2_MPEG_MFC51_VIDEO_FRAME_SKIP_MODE_LEVEL_LIMIT	= 1,
-	V4L2_MPEG_MFC51_VIDEO_FRAME_SKIP_MODE_BUF_LIMIT		= 2,
-};
-#define V4L2_CID_MPEG_MFC51_VIDEO_FORCE_FRAME_TYPE			(V4L2_CID_MPEG_MFC51_BASE+3)
-enum v4l2_mpeg_mfc51_video_force_frame_type {
-	V4L2_MPEG_MFC51_VIDEO_FORCE_FRAME_TYPE_DISABLED		= 0,
-	V4L2_MPEG_MFC51_VIDEO_FORCE_FRAME_TYPE_I_FRAME		= 1,
-	V4L2_MPEG_MFC51_VIDEO_FORCE_FRAME_TYPE_NOT_CODED	= 2,
-};
-#define V4L2_CID_MPEG_MFC51_VIDEO_PADDING				(V4L2_CID_MPEG_MFC51_BASE+4)
-#define V4L2_CID_MPEG_MFC51_VIDEO_PADDING_YUV				(V4L2_CID_MPEG_MFC51_BASE+5)
-#define V4L2_CID_MPEG_MFC51_VIDEO_RC_FIXED_TARGET_BIT			(V4L2_CID_MPEG_MFC51_BASE+6)
-#define V4L2_CID_MPEG_MFC51_VIDEO_RC_REACTION_COEFF			(V4L2_CID_MPEG_MFC51_BASE+7)
-#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_ACTIVITY		(V4L2_CID_MPEG_MFC51_BASE+50)
-#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_DARK			(V4L2_CID_MPEG_MFC51_BASE+51)
-#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_SMOOTH		(V4L2_CID_MPEG_MFC51_BASE+52)
-#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_STATIC		(V4L2_CID_MPEG_MFC51_BASE+53)
-#define V4L2_CID_MPEG_MFC51_VIDEO_H264_NUM_REF_PIC_FOR_P		(V4L2_CID_MPEG_MFC51_BASE+54)
-
-/*  Camera class control IDs */
-#define V4L2_CID_CAMERA_CLASS_BASE 	(V4L2_CTRL_CLASS_CAMERA | 0x900)
-#define V4L2_CID_CAMERA_CLASS 		(V4L2_CTRL_CLASS_CAMERA | 1)
-
-#define V4L2_CID_EXPOSURE_AUTO			(V4L2_CID_CAMERA_CLASS_BASE+1)
-enum  v4l2_exposure_auto_type {
-	V4L2_EXPOSURE_AUTO = 0,
-	V4L2_EXPOSURE_MANUAL = 1,
-	V4L2_EXPOSURE_SHUTTER_PRIORITY = 2,
-	V4L2_EXPOSURE_APERTURE_PRIORITY = 3
-};
-#define V4L2_CID_EXPOSURE_ABSOLUTE		(V4L2_CID_CAMERA_CLASS_BASE+2)
-#define V4L2_CID_EXPOSURE_AUTO_PRIORITY		(V4L2_CID_CAMERA_CLASS_BASE+3)
-
-#define V4L2_CID_PAN_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+4)
-#define V4L2_CID_TILT_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+5)
-#define V4L2_CID_PAN_RESET			(V4L2_CID_CAMERA_CLASS_BASE+6)
-#define V4L2_CID_TILT_RESET			(V4L2_CID_CAMERA_CLASS_BASE+7)
-
-#define V4L2_CID_PAN_ABSOLUTE			(V4L2_CID_CAMERA_CLASS_BASE+8)
-#define V4L2_CID_TILT_ABSOLUTE			(V4L2_CID_CAMERA_CLASS_BASE+9)
-
-#define V4L2_CID_FOCUS_ABSOLUTE			(V4L2_CID_CAMERA_CLASS_BASE+10)
-#define V4L2_CID_FOCUS_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+11)
-#define V4L2_CID_FOCUS_AUTO			(V4L2_CID_CAMERA_CLASS_BASE+12)
-
-#define V4L2_CID_ZOOM_ABSOLUTE			(V4L2_CID_CAMERA_CLASS_BASE+13)
-#define V4L2_CID_ZOOM_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+14)
-#define V4L2_CID_ZOOM_CONTINUOUS		(V4L2_CID_CAMERA_CLASS_BASE+15)
-
-#define V4L2_CID_PRIVACY			(V4L2_CID_CAMERA_CLASS_BASE+16)
-
-#define V4L2_CID_IRIS_ABSOLUTE			(V4L2_CID_CAMERA_CLASS_BASE+17)
-#define V4L2_CID_IRIS_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+18)
-
-#define V4L2_CID_AUTO_EXPOSURE_BIAS		(V4L2_CID_CAMERA_CLASS_BASE+19)
-
-#define V4L2_CID_AUTO_N_PRESET_WHITE_BALANCE	(V4L2_CID_CAMERA_CLASS_BASE+20)
-enum v4l2_auto_n_preset_white_balance {
-	V4L2_WHITE_BALANCE_MANUAL		= 0,
-	V4L2_WHITE_BALANCE_AUTO			= 1,
-	V4L2_WHITE_BALANCE_INCANDESCENT		= 2,
-	V4L2_WHITE_BALANCE_FLUORESCENT		= 3,
-	V4L2_WHITE_BALANCE_FLUORESCENT_H	= 4,
-	V4L2_WHITE_BALANCE_HORIZON		= 5,
-	V4L2_WHITE_BALANCE_DAYLIGHT		= 6,
-	V4L2_WHITE_BALANCE_FLASH		= 7,
-	V4L2_WHITE_BALANCE_CLOUDY		= 8,
-	V4L2_WHITE_BALANCE_SHADE		= 9,
-};
-
-#define V4L2_CID_WIDE_DYNAMIC_RANGE		(V4L2_CID_CAMERA_CLASS_BASE+21)
-#define V4L2_CID_IMAGE_STABILIZATION		(V4L2_CID_CAMERA_CLASS_BASE+22)
-
-#define V4L2_CID_ISO_SENSITIVITY		(V4L2_CID_CAMERA_CLASS_BASE+23)
-#define V4L2_CID_ISO_SENSITIVITY_AUTO		(V4L2_CID_CAMERA_CLASS_BASE+24)
-enum v4l2_iso_sensitivity_auto_type {
-	V4L2_ISO_SENSITIVITY_MANUAL		= 0,
-	V4L2_ISO_SENSITIVITY_AUTO		= 1,
-};
-
-#define V4L2_CID_EXPOSURE_METERING		(V4L2_CID_CAMERA_CLASS_BASE+25)
-enum v4l2_exposure_metering {
-	V4L2_EXPOSURE_METERING_AVERAGE		= 0,
-	V4L2_EXPOSURE_METERING_CENTER_WEIGHTED	= 1,
-	V4L2_EXPOSURE_METERING_SPOT		= 2,
-};
-
-#define V4L2_CID_SCENE_MODE			(V4L2_CID_CAMERA_CLASS_BASE+26)
-enum v4l2_scene_mode {
-	V4L2_SCENE_MODE_NONE			= 0,
-	V4L2_SCENE_MODE_BACKLIGHT		= 1,
-	V4L2_SCENE_MODE_BEACH_SNOW		= 2,
-	V4L2_SCENE_MODE_CANDLE_LIGHT		= 3,
-	V4L2_SCENE_MODE_DAWN_DUSK		= 4,
-	V4L2_SCENE_MODE_FALL_COLORS		= 5,
-	V4L2_SCENE_MODE_FIREWORKS		= 6,
-	V4L2_SCENE_MODE_LANDSCAPE		= 7,
-	V4L2_SCENE_MODE_NIGHT			= 8,
-	V4L2_SCENE_MODE_PARTY_INDOOR		= 9,
-	V4L2_SCENE_MODE_PORTRAIT		= 10,
-	V4L2_SCENE_MODE_SPORTS			= 11,
-	V4L2_SCENE_MODE_SUNSET			= 12,
-	V4L2_SCENE_MODE_TEXT			= 13,
-};
-
-#define V4L2_CID_3A_LOCK			(V4L2_CID_CAMERA_CLASS_BASE+27)
-#define V4L2_LOCK_EXPOSURE			(1 << 0)
-#define V4L2_LOCK_WHITE_BALANCE			(1 << 1)
-#define V4L2_LOCK_FOCUS				(1 << 2)
-
-#define V4L2_CID_AUTO_FOCUS_START		(V4L2_CID_CAMERA_CLASS_BASE+28)
-#define V4L2_CID_AUTO_FOCUS_STOP		(V4L2_CID_CAMERA_CLASS_BASE+29)
-#define V4L2_CID_AUTO_FOCUS_STATUS		(V4L2_CID_CAMERA_CLASS_BASE+30)
-#define V4L2_AUTO_FOCUS_STATUS_IDLE		(0 << 0)
-#define V4L2_AUTO_FOCUS_STATUS_BUSY		(1 << 0)
-#define V4L2_AUTO_FOCUS_STATUS_REACHED		(1 << 1)
-#define V4L2_AUTO_FOCUS_STATUS_FAILED		(1 << 2)
-
-#define V4L2_CID_AUTO_FOCUS_RANGE		(V4L2_CID_CAMERA_CLASS_BASE+31)
-enum v4l2_auto_focus_range {
-	V4L2_AUTO_FOCUS_RANGE_AUTO		= 0,
-	V4L2_AUTO_FOCUS_RANGE_NORMAL		= 1,
-	V4L2_AUTO_FOCUS_RANGE_MACRO		= 2,
-	V4L2_AUTO_FOCUS_RANGE_INFINITY		= 3,
-};
-
-/* FM Modulator class control IDs */
-#define V4L2_CID_FM_TX_CLASS_BASE		(V4L2_CTRL_CLASS_FM_TX | 0x900)
-#define V4L2_CID_FM_TX_CLASS			(V4L2_CTRL_CLASS_FM_TX | 1)
-
-#define V4L2_CID_RDS_TX_DEVIATION		(V4L2_CID_FM_TX_CLASS_BASE + 1)
-#define V4L2_CID_RDS_TX_PI			(V4L2_CID_FM_TX_CLASS_BASE + 2)
-#define V4L2_CID_RDS_TX_PTY			(V4L2_CID_FM_TX_CLASS_BASE + 3)
-#define V4L2_CID_RDS_TX_PS_NAME			(V4L2_CID_FM_TX_CLASS_BASE + 5)
-#define V4L2_CID_RDS_TX_RADIO_TEXT		(V4L2_CID_FM_TX_CLASS_BASE + 6)
-
-#define V4L2_CID_AUDIO_LIMITER_ENABLED		(V4L2_CID_FM_TX_CLASS_BASE + 64)
-#define V4L2_CID_AUDIO_LIMITER_RELEASE_TIME	(V4L2_CID_FM_TX_CLASS_BASE + 65)
-#define V4L2_CID_AUDIO_LIMITER_DEVIATION	(V4L2_CID_FM_TX_CLASS_BASE + 66)
-
-#define V4L2_CID_AUDIO_COMPRESSION_ENABLED	(V4L2_CID_FM_TX_CLASS_BASE + 80)
-#define V4L2_CID_AUDIO_COMPRESSION_GAIN		(V4L2_CID_FM_TX_CLASS_BASE + 81)
-#define V4L2_CID_AUDIO_COMPRESSION_THRESHOLD	(V4L2_CID_FM_TX_CLASS_BASE + 82)
-#define V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME	(V4L2_CID_FM_TX_CLASS_BASE + 83)
-#define V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME	(V4L2_CID_FM_TX_CLASS_BASE + 84)
-
-#define V4L2_CID_PILOT_TONE_ENABLED		(V4L2_CID_FM_TX_CLASS_BASE + 96)
-#define V4L2_CID_PILOT_TONE_DEVIATION		(V4L2_CID_FM_TX_CLASS_BASE + 97)
-#define V4L2_CID_PILOT_TONE_FREQUENCY		(V4L2_CID_FM_TX_CLASS_BASE + 98)
-
-#define V4L2_CID_TUNE_PREEMPHASIS		(V4L2_CID_FM_TX_CLASS_BASE + 112)
-enum v4l2_preemphasis {
-	V4L2_PREEMPHASIS_DISABLED	= 0,
-	V4L2_PREEMPHASIS_50_uS		= 1,
-	V4L2_PREEMPHASIS_75_uS		= 2,
-};
-#define V4L2_CID_TUNE_POWER_LEVEL		(V4L2_CID_FM_TX_CLASS_BASE + 113)
-#define V4L2_CID_TUNE_ANTENNA_CAPACITOR		(V4L2_CID_FM_TX_CLASS_BASE + 114)
-
-/* Flash and privacy (indicator) light controls */
-#define V4L2_CID_FLASH_CLASS_BASE		(V4L2_CTRL_CLASS_FLASH | 0x900)
-#define V4L2_CID_FLASH_CLASS			(V4L2_CTRL_CLASS_FLASH | 1)
-
-#define V4L2_CID_FLASH_LED_MODE			(V4L2_CID_FLASH_CLASS_BASE + 1)
-enum v4l2_flash_led_mode {
-	V4L2_FLASH_LED_MODE_NONE,
-	V4L2_FLASH_LED_MODE_FLASH,
-	V4L2_FLASH_LED_MODE_TORCH,
-};
-
-#define V4L2_CID_FLASH_STROBE_SOURCE		(V4L2_CID_FLASH_CLASS_BASE + 2)
-enum v4l2_flash_strobe_source {
-	V4L2_FLASH_STROBE_SOURCE_SOFTWARE,
-	V4L2_FLASH_STROBE_SOURCE_EXTERNAL,
-};
-
-#define V4L2_CID_FLASH_STROBE			(V4L2_CID_FLASH_CLASS_BASE + 3)
-#define V4L2_CID_FLASH_STROBE_STOP		(V4L2_CID_FLASH_CLASS_BASE + 4)
-#define V4L2_CID_FLASH_STROBE_STATUS		(V4L2_CID_FLASH_CLASS_BASE + 5)
-
-#define V4L2_CID_FLASH_TIMEOUT			(V4L2_CID_FLASH_CLASS_BASE + 6)
-#define V4L2_CID_FLASH_INTENSITY		(V4L2_CID_FLASH_CLASS_BASE + 7)
-#define V4L2_CID_FLASH_TORCH_INTENSITY		(V4L2_CID_FLASH_CLASS_BASE + 8)
-#define V4L2_CID_FLASH_INDICATOR_INTENSITY	(V4L2_CID_FLASH_CLASS_BASE + 9)
-
-#define V4L2_CID_FLASH_FAULT			(V4L2_CID_FLASH_CLASS_BASE + 10)
-#define V4L2_FLASH_FAULT_OVER_VOLTAGE		(1 << 0)
-#define V4L2_FLASH_FAULT_TIMEOUT		(1 << 1)
-#define V4L2_FLASH_FAULT_OVER_TEMPERATURE	(1 << 2)
-#define V4L2_FLASH_FAULT_SHORT_CIRCUIT		(1 << 3)
-#define V4L2_FLASH_FAULT_OVER_CURRENT		(1 << 4)
-#define V4L2_FLASH_FAULT_INDICATOR		(1 << 5)
-
-#define V4L2_CID_FLASH_CHARGE			(V4L2_CID_FLASH_CLASS_BASE + 11)
-#define V4L2_CID_FLASH_READY			(V4L2_CID_FLASH_CLASS_BASE + 12)
-
-/*  JPEG-class control IDs defined by V4L2 */
-#define V4L2_CID_JPEG_CLASS_BASE		(V4L2_CTRL_CLASS_JPEG | 0x900)
-#define V4L2_CID_JPEG_CLASS			(V4L2_CTRL_CLASS_JPEG | 1)
-
-#define	V4L2_CID_JPEG_CHROMA_SUBSAMPLING	(V4L2_CID_JPEG_CLASS_BASE + 1)
-enum v4l2_jpeg_chroma_subsampling {
-	V4L2_JPEG_CHROMA_SUBSAMPLING_444	= 0,
-	V4L2_JPEG_CHROMA_SUBSAMPLING_422	= 1,
-	V4L2_JPEG_CHROMA_SUBSAMPLING_420	= 2,
-	V4L2_JPEG_CHROMA_SUBSAMPLING_411	= 3,
-	V4L2_JPEG_CHROMA_SUBSAMPLING_410	= 4,
-	V4L2_JPEG_CHROMA_SUBSAMPLING_GRAY	= 5,
-};
-#define	V4L2_CID_JPEG_RESTART_INTERVAL		(V4L2_CID_JPEG_CLASS_BASE + 2)
-#define	V4L2_CID_JPEG_COMPRESSION_QUALITY	(V4L2_CID_JPEG_CLASS_BASE + 3)
-
-#define	V4L2_CID_JPEG_ACTIVE_MARKER		(V4L2_CID_JPEG_CLASS_BASE + 4)
-#define	V4L2_JPEG_ACTIVE_MARKER_APP0		(1 << 0)
-#define	V4L2_JPEG_ACTIVE_MARKER_APP1		(1 << 1)
-#define	V4L2_JPEG_ACTIVE_MARKER_COM		(1 << 16)
-#define	V4L2_JPEG_ACTIVE_MARKER_DQT		(1 << 17)
-#define	V4L2_JPEG_ACTIVE_MARKER_DHT		(1 << 18)
-
-/* Image source controls */
-#define V4L2_CID_IMAGE_SOURCE_CLASS_BASE	(V4L2_CTRL_CLASS_IMAGE_SOURCE | 0x900)
-#define V4L2_CID_IMAGE_SOURCE_CLASS		(V4L2_CTRL_CLASS_IMAGE_SOURCE | 1)
-
-#define V4L2_CID_VBLANK				(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 1)
-#define V4L2_CID_HBLANK				(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 2)
-#define V4L2_CID_ANALOGUE_GAIN			(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 3)
-
-/* Image processing controls */
-#define V4L2_CID_IMAGE_PROC_CLASS_BASE		(V4L2_CTRL_CLASS_IMAGE_PROC | 0x900)
-#define V4L2_CID_IMAGE_PROC_CLASS		(V4L2_CTRL_CLASS_IMAGE_PROC | 1)
-
-#define V4L2_CID_LINK_FREQ			(V4L2_CID_IMAGE_PROC_CLASS_BASE + 1)
-#define V4L2_CID_PIXEL_RATE			(V4L2_CID_IMAGE_PROC_CLASS_BASE + 2)
 
 /*  DV-class control IDs defined by V4L2 */
 #define V4L2_CID_DV_CLASS_BASE			(V4L2_CTRL_CLASS_DV | 0x900)
-- 
cgit v1.2.3


From 633c98e52a64ac8548b5d27dca1497cd4f0a6d4c Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Mon, 17 Sep 2012 05:02:26 -0300
Subject: [media] v4l2-core: deprecate V4L2_BUF_TYPE_PRIVATE

This buffer type isn't used at all, and since it is effectively undefined
what it should do it is deprecated. The define still exists, but any
internal support for such buffers is removed.
The decisions to deprecate this was taken during the 2012 Media Workshop.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 1b72a38b7c8c..1ed8f904567d 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -162,6 +162,7 @@ enum v4l2_buf_type {
 #endif
 	V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE = 9,
 	V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE  = 10,
+	/* Deprecated, do not use */
 	V4L2_BUF_TYPE_PRIVATE              = 0x80,
 };
 
-- 
cgit v1.2.3


From 1c4f3c987f44a6653ac49f93d8cbd3adb539be10 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Mon, 3 Sep 2012 10:38:41 -0300
Subject: [media] Rename V4L2_(IN|OUT)_CAP_CUSTOM_TIMINGS

The 'custom' timings are no longer just for custom timings, but also for standard
CEA/VESA timings. So rename to V4L2_IN/OUT_CAP_DV_TIMINGS.
The old define is still kept for backwards compatibility.
This decision was taken during the 2012 Media Workshop.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 1ed8f904567d..61395ef85a00 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1191,7 +1191,8 @@ struct v4l2_input {
 
 /* capabilities flags */
 #define V4L2_IN_CAP_PRESETS		0x00000001 /* Supports S_DV_PRESET */
-#define V4L2_IN_CAP_CUSTOM_TIMINGS	0x00000002 /* Supports S_DV_TIMINGS */
+#define V4L2_IN_CAP_DV_TIMINGS		0x00000002 /* Supports S_DV_TIMINGS */
+#define V4L2_IN_CAP_CUSTOM_TIMINGS	V4L2_IN_CAP_DV_TIMINGS /* For compatibility */
 #define V4L2_IN_CAP_STD			0x00000004 /* Supports S_STD */
 
 /*
@@ -1214,7 +1215,8 @@ struct v4l2_output {
 
 /* capabilities flags */
 #define V4L2_OUT_CAP_PRESETS		0x00000001 /* Supports S_DV_PRESET */
-#define V4L2_OUT_CAP_CUSTOM_TIMINGS	0x00000002 /* Supports S_DV_TIMINGS */
+#define V4L2_OUT_CAP_DV_TIMINGS		0x00000002 /* Supports S_DV_TIMINGS */
+#define V4L2_OUT_CAP_CUSTOM_TIMINGS	V4L2_OUT_CAP_DV_TIMINGS /* For compatibility */
 #define V4L2_OUT_CAP_STD		0x00000004 /* Supports S_STD */
 
 /*
-- 
cgit v1.2.3


From 8a2697abc1f0388d44b78ac109d9f03ec75c2683 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Wed, 11 Jul 2012 21:54:50 -0300
Subject: [media] add LNA support for DVB API

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/dvb/frontend.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 57e2b1763109..c12d452cb40d 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -363,8 +363,9 @@ struct dvb_frontend_event {
 #define DTV_ATSCMH_SCCC_CODE_MODE_D	59
 
 #define DTV_INTERLEAVING			60
+#define DTV_LNA					61
 
-#define DTV_MAX_COMMAND				DTV_INTERLEAVING
+#define DTV_MAX_COMMAND				DTV_LNA
 
 typedef enum fe_pilot {
 	PILOT_ON,
@@ -438,6 +439,7 @@ enum atscmh_rs_code_mode {
 };
 
 #define NO_STREAM_ID_FILTER	(~0U)
+#define LNA_AUTO                (~0U)
 
 struct dtv_cmds_h {
 	char	*name;		/* A display name for debugging purposes */
-- 
cgit v1.2.3